architecture: SSD pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_coco_pretrained.tar use_gpu: true max_iters: 28000 snapshot_iter: 2000 log_smooth_window: 1 metric: VOC map_type: 11point save_dir: output weights: output/ssd_mobilenet_v1_voc/model_final/ # 20(label_class) + 1(background) num_classes: 21 SSD: backbone: MobileNet multi_box_head: MultiBoxHead output_decoder: background_label: 0 keep_top_k: 200 nms_eta: 1.0 nms_threshold: 0.45 nms_top_k: 400 score_threshold: 0.01 MobileNet: norm_decay: 0. conv_group_scale: 1 conv_learning_rate: 0.1 extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]] with_extra_blocks: true MultiBoxHead: aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]] base_size: 300 flip: true max_ratio: 90 max_sizes: [[], 150.0, 195.0, 240.0, 285.0, 300.0] min_ratio: 20 min_sizes: [60.0, 105.0, 150.0, 195.0, 240.0, 285.0] offset: 0.5 LearningRate: schedulers: - !PiecewiseDecay milestones: [10000, 15000, 20000, 25000] values: [0.001, 0.0005, 0.00025, 0.0001, 0.00001] OptimizerBuilder: optimizer: momentum: 0.0 type: RMSPropOptimizer regularizer: factor: 0.00005 type: L2 TrainReader: inputs_def: image_shape: [3, 300, 300] fields: ['image', 'gt_bbox', 'gt_class'] dataset: !VOCDataSet anno_path: trainval.txt dataset_dir: dataset/voc use_default_label: true sample_transforms: - !DecodeImage to_rgb: true - !RandomDistort brightness_lower: 0.875 brightness_upper: 1.125 is_order: true - !RandomExpand fill_value: [127.5, 127.5, 127.5] - !RandomCrop allow_no_crop: false - !NormalizeBox {} - !ResizeImage interp: 1 target_size: 300 use_cv2: false - !RandomFlipImage is_normalized: true - !Permute {} - !NormalizeImage is_scale: false mean: [127.5, 127.5, 127.5] std: [127.502231, 127.502231, 127.502231] batch_size: 32 shuffle: true drop_last: true worker_num: 8 bufsize: 16 use_process: true EvalReader: inputs_def: image_shape: [3, 300, 300] fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id', 'is_difficult'] dataset: !VOCDataSet anno_path: test.txt dataset_dir: dataset/voc use_default_label: true sample_transforms: - !DecodeImage to_rgb: true - !NormalizeBox {} - !ResizeImage interp: 1 target_size: 300 use_cv2: false - !Permute {} - !NormalizeImage is_scale: false mean: [127.5, 127.5, 127.5] std: [127.502231, 127.502231, 127.502231] batch_size: 32 worker_num: 8 bufsize: 32 use_process: false TestReader: inputs_def: image_shape: [3,300,300] fields: ['image', 'im_id', 'im_shape'] dataset: !ImageFolder anno_path: test.txt use_default_label: true sample_transforms: - !DecodeImage to_rgb: true - !ResizeImage interp: 1 max_size: 0 target_size: 300 use_cv2: false - !Permute {} - !NormalizeImage is_scale: false mean: [127.5, 127.5, 127.5] std: [127.502231, 127.502231, 127.502231] batch_size: 1