diff --git a/configs/dcn/cascade_rcnn_dcn_r101_vd_fpn_1x.yml b/configs/dcn/cascade_rcnn_dcn_r101_vd_fpn_1x.yml new file mode 100644 index 0000000000000000000000000000000000000000..d59a5394dd4b8a2a80f07e601a7ab53cceb6ea5d --- /dev/null +++ b/configs/dcn/cascade_rcnn_dcn_r101_vd_fpn_1x.yml @@ -0,0 +1,139 @@ +architecture: CascadeRCNN +train_feed: FasterRCNNTrainFeed +eval_feed: FasterRCNNEvalFeed +test_feed: FasterRCNNTestFeed +max_iters: 90000 +snapshot_iter: 10000 +use_gpu: true +log_smooth_window: 20 +log_iter: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar +weights: output/cascade_rcnn_dcn_r101_vd_fpn_1x/model_final +metric: COCO +num_classes: 81 + +CascadeRCNN: + backbone: ResNet + fpn: FPN + rpn_head: FPNRPNHead + roi_extractor: FPNRoIAlign + bbox_head: CascadeBBoxHead + bbox_assigner: CascadeBBoxAssigner + +ResNet: + norm_type: bn + depth: 101 + feature_maps: [2, 3, 4, 5] + freeze_at: 2 + variant: d + dcn_stages: [3, 4, 5] + +FPN: + min_level: 2 + max_level: 6 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125, 0.25] + +FPNRPNHead: + anchor_generator: + anchor_sizes: [32, 64, 128, 256, 512] + aspect_ratios: [0.5, 1.0, 2.0] + stride: [16.0, 16.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_start_size: 32 + min_level: 2 + max_level: 6 + num_chan: 256 + rpn_target_assign: + rpn_batch_size_per_im: 256 + rpn_fg_fraction: 0.5 + rpn_positive_overlap: 0.7 + rpn_negative_overlap: 0.3 + rpn_straddle_thresh: 0.0 + train_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 2000 + post_nms_top_n: 2000 + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 1000 + post_nms_top_n: 1000 + +FPNRoIAlign: + canconical_level: 4 + canonical_size: 224 + min_level: 2 + max_level: 5 + box_resolution: 7 + sampling_ratio: 2 + +CascadeBBoxAssigner: + batch_size_per_im: 512 + bbox_reg_weights: [10, 20, 30] + bg_thresh_lo: [0.0, 0.0, 0.0] + bg_thresh_hi: [0.5, 0.6, 0.7] + fg_thresh: [0.5, 0.6, 0.7] + fg_fraction: 0.25 + +CascadeBBoxHead: + head: FC6FC7Head + nms: + keep_top_k: 100 + nms_threshold: 0.5 + score_threshold: 0.05 + +FC6FC7Head: + num_chan: 1024 + +LearningRate: + base_lr: 0.02 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [60000, 80000] + - !LinearWarmup + start_factor: 0.1 + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +FasterRCNNTrainFeed: + batch_size: 2 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_train2017.json + image_dir: train2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + drop_last: false + num_workers: 2 + +FasterRCNNEvalFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_val2017.json + image_dir: val2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + +FasterRCNNTestFeed: + batch_size: 1 + dataset: + annotation: dataset/coco/annotations/instances_val2017.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + drop_last: false + num_workers: 2 diff --git a/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x.yml b/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x.yml new file mode 100644 index 0000000000000000000000000000000000000000..5777438465f53535cc14a595f1dd83faded250ea --- /dev/null +++ b/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x.yml @@ -0,0 +1,139 @@ +architecture: CascadeRCNN +train_feed: FasterRCNNTrainFeed +eval_feed: FasterRCNNEvalFeed +test_feed: FasterRCNNTestFeed +max_iters: 90000 +snapshot_iter: 10000 +use_gpu: true +log_smooth_window: 20 +log_iter: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar +weights: output/cascade_rcnn_dcn_r50_fpn_1x/model_final +metric: COCO +num_classes: 81 + +CascadeRCNN: + backbone: ResNet + fpn: FPN + rpn_head: FPNRPNHead + roi_extractor: FPNRoIAlign + bbox_head: CascadeBBoxHead + bbox_assigner: CascadeBBoxAssigner + +ResNet: + norm_type: bn + depth: 50 + feature_maps: [2, 3, 4, 5] + freeze_at: 2 + variant: b + dcn_stages: [3, 4, 5] + +FPN: + min_level: 2 + max_level: 6 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125, 0.25] + +FPNRPNHead: + anchor_generator: + anchor_sizes: [32, 64, 128, 256, 512] + aspect_ratios: [0.5, 1.0, 2.0] + stride: [16.0, 16.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_start_size: 32 + min_level: 2 + max_level: 6 + num_chan: 256 + rpn_target_assign: + rpn_batch_size_per_im: 256 + rpn_fg_fraction: 0.5 + rpn_positive_overlap: 0.7 + rpn_negative_overlap: 0.3 + rpn_straddle_thresh: 0.0 + train_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 2000 + post_nms_top_n: 2000 + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 1000 + post_nms_top_n: 1000 + +FPNRoIAlign: + canconical_level: 4 + canonical_size: 224 + min_level: 2 + max_level: 5 + box_resolution: 7 + sampling_ratio: 2 + +CascadeBBoxAssigner: + batch_size_per_im: 512 + bbox_reg_weights: [10, 20, 30] + bg_thresh_lo: [0.0, 0.0, 0.0] + bg_thresh_hi: [0.5, 0.6, 0.7] + fg_thresh: [0.5, 0.6, 0.7] + fg_fraction: 0.25 + +CascadeBBoxHead: + head: FC6FC7Head + nms: + keep_top_k: 100 + nms_threshold: 0.5 + score_threshold: 0.05 + +FC6FC7Head: + num_chan: 1024 + +LearningRate: + base_lr: 0.02 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [60000, 80000] + - !LinearWarmup + start_factor: 0.1 + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +FasterRCNNTrainFeed: + batch_size: 2 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_train2017.json + image_dir: train2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + drop_last: false + num_workers: 2 + +FasterRCNNEvalFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_val2017.json + image_dir: val2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + +FasterRCNNTestFeed: + batch_size: 1 + dataset: + annotation: dataset/coco/annotations/instances_val2017.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + drop_last: false + num_workers: 2 diff --git a/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml b/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml new file mode 100644 index 0000000000000000000000000000000000000000..7e246e13be55bc7add3c040e2c35b2cf32f79d51 --- /dev/null +++ b/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml @@ -0,0 +1,141 @@ +architecture: CascadeRCNN +train_feed: FasterRCNNTrainFeed +eval_feed: FasterRCNNEvalFeed +test_feed: FasterRCNNTestFeed +max_iters: 90000 +snapshot_iter: 10000 +use_gpu: true +log_smooth_window: 20 +log_iter: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar +weights: output/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x/model_final +metric: COCO +num_classes: 81 + +CascadeRCNN: + backbone: ResNeXt + fpn: FPN + rpn_head: FPNRPNHead + roi_extractor: FPNRoIAlign + bbox_head: CascadeBBoxHead + bbox_assigner: CascadeBBoxAssigner + +ResNeXt: + norm_type: bn + depth: 101 + feature_maps: [2, 3, 4, 5] + freeze_at: 2 + group_width: 4 + groups: 64 + variant: d + dcn_stages: [3, 4, 5] + +FPN: + min_level: 2 + max_level: 6 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125, 0.25] + +FPNRPNHead: + anchor_generator: + anchor_sizes: [32, 64, 128, 256, 512] + aspect_ratios: [0.5, 1.0, 2.0] + stride: [16.0, 16.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_start_size: 32 + min_level: 2 + max_level: 6 + num_chan: 256 + rpn_target_assign: + rpn_batch_size_per_im: 256 + rpn_fg_fraction: 0.5 + rpn_positive_overlap: 0.7 + rpn_negative_overlap: 0.3 + rpn_straddle_thresh: 0.0 + train_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 2000 + post_nms_top_n: 2000 + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 1000 + post_nms_top_n: 1000 + +FPNRoIAlign: + canconical_level: 4 + canonical_size: 224 + min_level: 2 + max_level: 5 + box_resolution: 7 + sampling_ratio: 2 + +CascadeBBoxAssigner: + batch_size_per_im: 512 + bbox_reg_weights: [10, 20, 30] + bg_thresh_lo: [0.0, 0.0, 0.0] + bg_thresh_hi: [0.5, 0.6, 0.7] + fg_thresh: [0.5, 0.6, 0.7] + fg_fraction: 0.25 + +CascadeBBoxHead: + head: FC6FC7Head + nms: + keep_top_k: 100 + nms_threshold: 0.5 + score_threshold: 0.05 + +FC6FC7Head: + num_chan: 1024 + +LearningRate: + base_lr: 0.02 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [60000, 80000] + - !LinearWarmup + start_factor: 0.1 + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +FasterRCNNTrainFeed: + batch_size: 2 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_train2017.json + image_dir: train2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + drop_last: false + num_workers: 2 + +FasterRCNNEvalFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_val2017.json + image_dir: val2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + +FasterRCNNTestFeed: + batch_size: 1 + dataset: + annotation: dataset/coco/annotations/instances_val2017.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + drop_last: false + num_workers: 2 diff --git a/configs/retinanet_x101_vd_64x4d_fpn_1x.yml b/configs/retinanet_x101_vd_64x4d_fpn_1x.yml new file mode 100644 index 0000000000000000000000000000000000000000..6cc33fafa485c4a64cefef67ec0d6ab0b237db84 --- /dev/null +++ b/configs/retinanet_x101_vd_64x4d_fpn_1x.yml @@ -0,0 +1,108 @@ +architecture: RetinaNet +train_feed: FasterRCNNTrainFeed +eval_feed: FasterRCNNEvalFeed +test_feed: FasterRCNNTestFeed +max_iters: 180000 +use_gpu: true +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar +weights: output/retinanet_x101_vd_64x4d_fpn_1x/model_final +log_smooth_window: 20 +log_iter: 20 +snapshot_iter: 30000 +metric: COCO +save_dir: output +num_classes: 81 + +RetinaNet: + backbone: ResNeXt + fpn: FPN + retina_head: RetinaHead + +ResNeXt: + depth: 101 + feature_maps: [3, 4, 5] + freeze_at: 2 + group_width: 4 + groups: 64 + norm_type: bn + variant: d + +FPN: + max_level: 7 + min_level: 3 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125] + has_extra_convs: true + +RetinaHead: + num_convs_per_octave: 4 + num_chan: 256 + max_level: 7 + min_level: 3 + prior_prob: 0.01 + base_scale: 4 + num_scales_per_octave: 3 + anchor_generator: + aspect_ratios: [1.0, 2.0, 0.5] + variance: [1.0, 1.0, 1.0, 1.0] + target_assign: + positive_overlap: 0.5 + negative_overlap: 0.4 + gamma: 2.0 + alpha: 0.25 + sigma: 3.0151134457776365 + output_decoder: + score_thresh: 0.05 + nms_thresh: 0.5 + pre_nms_top_n: 1000 + detections_per_im: 100 + nms_eta: 1.0 + +LearningRate: + base_lr: 0.005 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [120000, 160000] + - !LinearWarmup + start_factor: 0.1 + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +FasterRCNNTrainFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_train2017.json + image_dir: train2017 + batch_transforms: + - !PadBatch + pad_to_stride: 128 + num_workers: 2 + +FasterRCNNEvalFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_val2017.json + image_dir: val2017 + batch_transforms: + - !PadBatch + pad_to_stride: 128 + num_workers: 2 + +FasterRCNNTestFeed: + batch_size: 1 + dataset: + annotation: dataset/coco/annotations/instances_val2017.json + batch_transforms: + - !PadBatch + pad_to_stride: 128 + num_workers: 2 diff --git a/docs/MODEL_ZOO.md b/docs/MODEL_ZOO.md index f418656a9ac3ad34b0ec5cb812b3a05e91cd6fa8..13b1e4d9e008ffebeb085bdcf3330d8f43e908ff 100644 --- a/docs/MODEL_ZOO.md +++ b/docs/MODEL_ZOO.md @@ -9,7 +9,7 @@ ## Common settings -- All models below except SSD were trained on `coco_2017_train`, and tested on `coco_2017_val`. +- All models below were trained on `coco_2017_train`, and tested on `coco_2017_val`. - Batch Normalization layers in backbones are replaced by Affine Channel layers. - Unless otherwise noted, all ResNet backbones adopt the [ResNet-B](https://arxiv.org/pdf/1812.01187) variant.. - For RCNN and RetinaNet models, only horizontal flipping data augmentation was used in the training phase and no augmentations were used in the testing phase. @@ -70,6 +70,9 @@ The backbone models pretrained on ImageNet are available. All backbone models ar | ResNet50-vd-FPN | Mask | c3-c5 | 1 | 2x | 42.9 | 38.0 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_r50_vd_fpn_2x.tar) | | ResNet101-vd-FPN | Mask | c3-c5 | 1 | 1x | 44.6 | 39.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_r101_vd_fpn_1x.tar) | | ResNeXt101-vd-FPN | Mask | c3-c5 | 1 | 1x | 46.2 | 40.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x.tar) | +| ResNet50-FPN | Cascade Faster | c3-c5 | 2 | 1x | 44.2 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcn_r50_fpn_1x.tar) | +| ResNet101-vd-FPN | Cascade Faster | c3-c5 | 2 | 1x | 46.4 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcn_r101_vd_fpn_1x.tar) | +| ResNeXt101-vd-FPN | Cascade Faster | c3-c5 | 2 | 1x | 47.3 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x.tar) | #### Notes: - Deformable ConvNets v2(dcn_v2) reference from [Deformable ConvNets v2](https://arxiv.org/abs/1811.11168). @@ -111,16 +114,18 @@ results of image size 608/416/320 above. ### RetinaNet -| Backbone | Image/gpu | Lr schd | Box AP | Download | -| :----------- | :-----: | :-----: | :----: | :-------: | -| ResNet50-FPN | 2 | 1x | 36.0 | [model](https://paddlemodels.bj.bcebos.com/object_detection/retinanet_r50_fpn_1x.tar) | -| ResNet101-FPN | 2 | 1x | 37.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/retinanet_r101_fpn_1x.tar) | +| Backbone | Image/gpu | Lr schd | Box AP | Download | +| :---------------: | :-----: | :-----: | :----: | :-------: | +| ResNet50-FPN | 2 | 1x | 36.0 | [model](https://paddlemodels.bj.bcebos.com/object_detection/retinanet_r50_fpn_1x.tar) | +| ResNet101-FPN | 2 | 1x | 37.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/retinanet_r101_fpn_1x.tar) | +| ResNeXt101-vd-FPN | 1 | 1x | 40.5 | [model](https://paddlemodels.bj.bcebos.com/object_detection/retinanet_x101_vd_64x4d_fpn_1x.tar) | **Notes:** In RetinaNet, the base LR is changed to 0.01 for minibatch size 16. ### SSD | Backbone | Size | Image/gpu | Lr schd | Box AP | Download | +| :----------: | :--: | :-------: | :-----: | :----: | :-------: | | VGG16 | 300 | 8 | 40w | 25.1 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ssd_vgg16_300.tar) | | VGG16 | 512 | 8 | 40w | 29.1 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ssd_vgg16_512.tar) | diff --git a/docs/MODEL_ZOO_cn.md b/docs/MODEL_ZOO_cn.md index 7cb3982f39c12326b04194483437565b1f1d4caf..93d7ab3f540380beb5697cec423d64abfa87518e 100644 --- a/docs/MODEL_ZOO_cn.md +++ b/docs/MODEL_ZOO_cn.md @@ -10,7 +10,7 @@ ## 通用设置 -- SSD模型在VOC数据集中训练和测试,其余所有模型均在COCO17数据集中训练和测试。 +- 所有模型均在COCO17数据集中训练和测试。 - 除非特殊说明,所有ResNet骨干网络采用[ResNet-B](https://arxiv.org/pdf/1812.01187)结构。 - 对于RCNN和RetinaNet系列模型,训练阶段仅使用水平翻转作为数据增强,测试阶段不使用数据增强。 @@ -70,6 +70,9 @@ Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型 | ResNet50-vd-FPN | Mask | c3-c5 | 1 | 2x | 42.9 | 38.0 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_r50_vd_fpn_2x.tar) | | ResNet101-vd-FPN | Mask | c3-c5 | 1 | 1x | 44.6 | 39.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_r101_vd_fpn_1x.tar) | | ResNeXt101-vd-FPN | Mask | c3-c5 | 1 | 1x | 46.2 | 40.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x.tar) | +| ResNet50-FPN | Cascade Faster | c3-c5 | 2 | 1x | 44.2 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcn_r50_fpn_1x.tar) | +| ResNet101-vd-FPN | Cascade Faster | c3-c5 | 2 | 1x | 46.4 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcn_r101_vd_fpn_1x.tar) | +| ResNeXt101-vd-FPN | Cascade Faster | c3-c5 | 2 | 1x | 47.3 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x.tar) | #### 注意事项: - Deformable卷积网络v2(dcn_v2)参考自论文[Deformable ConvNets v2](https://arxiv.org/abs/1811.11168). @@ -108,16 +111,18 @@ Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型 ### RetinaNet -| 骨架网络 | 每张GPU图片个数 | 学习率策略 | Box AP | 下载 | -| :----------- | :-----: | :-----: | :----: | :-------: | -| ResNet50-FPN | 2 | 1x | 36.0 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/retinanet_r50_fpn_1x.tar) | -| ResNet101-FPN | 2 | 1x | 37.3 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/retinanet_r101_fpn_1x.tar) | +| 骨架网络 | 每张GPU图片个数 | 学习率策略 | Box AP | 下载 | +| :---------------: | :-----: | :-----: | :----: | :-------: | +| ResNet50-FPN | 2 | 1x | 36.0 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/retinanet_r50_fpn_1x.tar) | +| ResNet101-FPN | 2 | 1x | 37.3 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/retinanet_r101_fpn_1x.tar) | +| ResNeXt101-vd-FPN | 1 | 1x | 40.5 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/retinanet_x101_vd_64x4d_fpn_1x.tar) | **注意事项:** RetinaNet系列模型中,在总batch size为16下情况下,初始学习率改为0.01。 ### SSD | 骨架网络 | 输入尺寸 | 每张GPU图片个数 | 学习率策略 | Box AP | 下载 | +| :----------: | :--: | :-------: | :-----: | :----: | :-------: | | VGG16 | 300 | 8 | 40万 | 25.1 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ssd_vgg16_300.tar) | | VGG16 | 512 | 8 | 40万 | 29.1 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ssd_vgg16_512.tar) |