From 30a40a3a612037dbd5ed9271ae011a91877d563c Mon Sep 17 00:00:00 2001 From: cnn Date: Wed, 31 Mar 2021 12:13:01 +0800 Subject: [PATCH] [doc] add config annotation doc (#2451) * add config annotation doc, test=document_fix * update doc of config * mlp_dim-->out_channel of TwoFCHead --- .../faster_rcnn_r50_fpn_1x_coco_annotation.md | 263 +++++++++++++++++ .../ppyolo_r50vd_dcn_1x_coco_annotation.md | 266 ++++++++++++++++++ 2 files changed, 529 insertions(+) create mode 100644 docs/tutorials/config_annotation/faster_rcnn_r50_fpn_1x_coco_annotation.md create mode 100644 docs/tutorials/config_annotation/ppyolo_r50vd_dcn_1x_coco_annotation.md diff --git a/docs/tutorials/config_annotation/faster_rcnn_r50_fpn_1x_coco_annotation.md b/docs/tutorials/config_annotation/faster_rcnn_r50_fpn_1x_coco_annotation.md new file mode 100644 index 000000000..460af362b --- /dev/null +++ b/docs/tutorials/config_annotation/faster_rcnn_r50_fpn_1x_coco_annotation.md @@ -0,0 +1,263 @@ +# RCNN系列模型参数配置教程 + +标签: 模型参数配置 + +以`faster_rcnn_r50_fpn_1x_coco.yml`为例,这个模型由五个子配置文件组成: + +- 数据配置文件 `coco_detection.yml` + +```yaml +# 数据评估类型 +metric: COCO +# 数据集的类别数 +num_classes: 80 + +# TrainDataset +TrainDataset: + !COCODataSet + # 图像数据路径,相对 dataset_dir 路径,os.path.join(dataset_dir, image_dir) + image_dir: train2017 + # 标注文件路径,相对 dataset_dir 路径,os.path.join(dataset_dir, anno_path) + anno_path: annotations/instances_train2017.json + # 数据文件夹 + dataset_dir: dataset/coco + # data_fields + data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd'] + +EvalDataset: + !COCODataSet + # 图像数据路径,相对 dataset_dir 路径,os.path.join(dataset_dir, image_dir) + image_dir: val2017 + # 标注文件路径,相对 dataset_dir 路径,os.path.join(dataset_dir, anno_path) + anno_path: annotations/instances_val2017.json + # 数据文件夹 + dataset_dir: dataset/coco + +TestDataset: + !ImageFolder + # 标注文件路径,相对 dataset_dir 路径,os.path.join(dataset_dir, anno_path) + anno_path: annotations/instances_val2017.json +``` + +- 优化器配置文件 `optimizer_1x.yml` + +```yaml +# 总训练轮数 +epoch: 12 + +# 学习率设置 +LearningRate: + # 默认为8卡训学习率 + base_lr: 0.01 + # 学习率调整策略 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + # 学习率变化位置(轮数) + milestones: [8, 11] + - !LinearWarmup + start_factor: 0.1 + steps: 1000 + +# 优化器 +OptimizerBuilder: + # 优化器 + optimizer: + momentum: 0.9 + type: Momentum + # 正则化 + regularizer: + factor: 0.0001 + type: L2 +``` + +- 数据读取配置文件 `faster_fpn_reader.yml` + +```yaml +# 每张GPU reader进程个数 +worker_num: 2 +# 训练数据 +TrainReader: + # 训练数据transforms + sample_transforms: + - Decode: {} + - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True} + - RandomFlip: {prob: 0.5} + - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - Permute: {} + batch_transforms: + # 由于模型存在FPN结构,输入图片需要padding为32的倍数 + - PadBatch: {pad_to_stride: 32} + # 训练时batch_size + batch_size: 1 + # 读取数据是是否乱序 + shuffle: true + # 是否丢弃最后不能完整组成batch的数据 + drop_last: true + # 表示reader是否对gt进行组batch的操作,在rcnn系列算法中设置为false,得到的gt格式为list[Tensor] + collate_batch: false + +# 评估数据 +EvalReader: + # 评估数据transforms + sample_transforms: + - Decode: {} + - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True} + - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - Permute: {} + batch_transforms: + # 由于模型存在FPN结构,输入图片需要padding为32的倍数 + - PadBatch: {pad_to_stride: 32} + # 评估时batch_size + batch_size: 1 + # 读取数据是是否乱序 + shuffle: false + # 是否丢弃最后不能完整组成batch的数据 + drop_last: false + # 是否丢弃没有标注的数据 + drop_empty: false + +# 测试数据 +TestReader: + # 测试数据transforms + sample_transforms: + - Decode: {} + - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True} + - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - Permute: {} + batch_transforms: + # 由于模型存在FPN结构,输入图片需要padding为32的倍数 + - PadBatch: {pad_to_stride: 32} + # 测试时batch_size + batch_size: 1 + # 读取数据是是否乱序 + shuffle: false + # 是否丢弃最后不能完整组成batch的数据 + drop_last: false +``` + +- 模型配置文件 `faster_rcnn_r50_fpn.yml` + +```yaml +# 模型结构类型 +architecture: FasterRCNN +# 预训练模型地址 +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams + +# FasterRCNN +FasterRCNN: + # backbone + backbone: ResNet + # neck + neck: FPN + # rpn_head + rpn_head: RPNHead + # bbox_head + bbox_head: BBoxHead + # post process + bbox_post_process: BBoxPostProcess + + +# backbone +ResNet: + # index 0 stands for res2 + depth: 50 + # norm_type,可设置参数:bn 或 sync_bn + norm_type: bn + # freeze_at index, 0 represent res2 + freeze_at: 0 + # return_idx + return_idx: [0,1,2,3] + # num_stages + num_stages: 4 + +# FPN +FPN: + # channel of FPN + out_channel: 256 + +# RPNHead +RPNHead: + # anchor generator + anchor_generator: + aspect_ratios: [0.5, 1.0, 2.0] + anchor_sizes: [[32], [64], [128], [256], [512]] + strides: [4, 8, 16, 32, 64] + # rpn_target_assign + rpn_target_assign: + batch_size_per_im: 256 + fg_fraction: 0.5 + negative_overlap: 0.3 + positive_overlap: 0.7 + use_random: True + # 训练时生成proposal的参数 + train_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 2000 + post_nms_top_n: 1000 + topk_after_collect: True + # 评估时生成proposal的参数 + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 1000 + post_nms_top_n: 1000 + +# BBoxHead +BBoxHead: + # TwoFCHead as BBoxHead + head: TwoFCHead + # roi align + roi_extractor: + resolution: 7 + sampling_ratio: 0 + aligned: True + # bbox_assigner + bbox_assigner: BBoxAssigner + +# BBoxAssigner +BBoxAssigner: + # batch_size_per_im + batch_size_per_im: 512 + # 背景阈值 + bg_thresh: 0.5 + # 前景阈值 + fg_thresh: 0.5 + # 前景比例 + fg_fraction: 0.25 + # 是否随机采样 + use_random: True + +# TwoFCHead +TwoFCHead: + # TwoFCHead特征维度 + out_channel: 1024 + + +# BBoxPostProcess +BBoxPostProcess: + # 解码 + decode: RCNNBox + # nms + nms: + # 使用MultiClassNMS + name: MultiClassNMS + keep_top_k: 100 + score_threshold: 0.05 + nms_threshold: 0.5 + +``` + +- 运行时置文件 `runtime.yml` + +```yaml +# 是否使用gpu +use_gpu: true +# 日志打印间隔 +log_iter: 20 +# save_dir +save_dir: output +# 模型保存间隔时间 +snapshot_epoch: 1 +``` diff --git a/docs/tutorials/config_annotation/ppyolo_r50vd_dcn_1x_coco_annotation.md b/docs/tutorials/config_annotation/ppyolo_r50vd_dcn_1x_coco_annotation.md new file mode 100644 index 000000000..9c7985fd2 --- /dev/null +++ b/docs/tutorials/config_annotation/ppyolo_r50vd_dcn_1x_coco_annotation.md @@ -0,0 +1,266 @@ +# YOLO系列模型参数配置教程 + +标签: 模型参数配置 + +以`ppyolo_r50vd_dcn_1x_coco.yml`为例,这个模型由五个子配置文件组成: + +- 数据配置文件 `coco_detection.yml` + +```yaml +# 数据评估类型 +metric: COCO +# 数据集的类别数 +num_classes: 80 + +# TrainDataset +TrainDataset: + !COCODataSet + # 图像数据路径,相对 dataset_dir 路径,os.path.join(dataset_dir, image_dir) + image_dir: train2017 + # 标注文件路径,相对 dataset_dir 路径,os.path.join(dataset_dir, anno_path) + anno_path: annotations/instances_train2017.json + # 数据文件夹 + dataset_dir: dataset/coco + # data_fields + data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd'] + +EvalDataset: + !COCODataSet + # 图像数据路径,相对 dataset_dir 路径,os.path.join(dataset_dir, image_dir) + image_dir: val2017 + # 标注文件路径,相对 dataset_dir 路径,os.path.join(dataset_dir, anno_path) + anno_path: annotations/instances_val2017.json + # 数据文件夹,os.path.join(dataset_dir, anno_path) + dataset_dir: dataset/coco + +TestDataset: + !ImageFolder + # 标注文件路径,相对 dataset_dir 路径 + anno_path: annotations/instances_val2017.json +``` + +- 优化器配置文件 `optimizer_1x.yml` + +```yaml +# 总训练轮数 +epoch: 405 + +# 学习率设置 +LearningRate: + # 默认为8卡训学习率 + base_lr: 0.01 + # 学习率调整策略 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + # 学习率变化位置(轮数) + milestones: + - 243 + - 324 + # Warmup + - !LinearWarmup + start_factor: 0. + steps: 4000 + +# 优化器 +OptimizerBuilder: + # 优化器 + optimizer: + momentum: 0.9 + type: Momentum + # 正则化 + regularizer: + factor: 0.0005 + type: L2 +``` + +- 数据读取配置文件 `ppyolo_reader.yml` + +```yaml +# 每张GPU reader进程个数 +worker_num: 2 +# 训练数据 +TrainReader: + inputs_def: + num_max_boxes: 50 + # 训练数据transforms + sample_transforms: + - Decode: {} + - Mixup: {alpha: 1.5, beta: 1.5} + - RandomDistort: {} + - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} + - RandomCrop: {} + - RandomFlip: {} + # batch_transforms + batch_transforms: + - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_size: True, random_interp: True, keep_ratio: False} + - NormalizeBox: {} + - PadBox: {num_max_boxes: 50} + - BboxXYXY2XYWH: {} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} + - Permute: {} + - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]} + # 训练时batch_size + batch_size: 24 + # 读取数据是是否乱序 + shuffle: true + # 是否丢弃最后不能完整组成batch的数据 + drop_last: true + # mixup_epoch,大于最大epoch,表示训练过程一直使用mixup数据增广 + mixup_epoch: 25000 + # 是否通过共享内存进行数据读取加速,需要保证共享内存大小(如/dev/shm)满足大于1G + use_shared_memory: true + +# 评估数据 +EvalReader: + # 评估数据transforms + sample_transforms: + - Decode: {} + - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} + - Permute: {} + # 评估时batch_size + batch_size: 8 + # 是否丢弃没有标注的数据 + drop_empty: false + +# 测试数据 +TestReader: + inputs_def: + image_shape: [3, 608, 608] + # 测试数据transforms + sample_transforms: + - Decode: {} + - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} + - Permute: {} + # 测试时batch_size + batch_size: 1 +``` + +- 模型配置文件 `ppyolo_r50vd_dcn.yml` + +```yaml +# 模型结构类型 +architecture: YOLOv3 +# 预训练模型地址 +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams +# norm_type +norm_type: sync_bn +# 是否使用ema +use_ema: true +# ema_decay +ema_decay: 0.9998 + +# YOLOv3 +YOLOv3: + # backbone + backbone: ResNet + # neck + neck: PPYOLOFPN + # yolo_head + yolo_head: YOLOv3Head + # post_process + post_process: BBoxPostProcess + + +# backbone +ResNet: + # depth + depth: 50 + # variant + variant: d + # return_idx, 0 represent res2 + return_idx: [1, 2, 3] + # dcn_v2_stages + dcn_v2_stages: [3] + # freeze_at + freeze_at: -1 + # freeze_norm + freeze_norm: false + # norm_decay + norm_decay: 0. + +# PPYOLOFPN +PPYOLOFPN: + # 是否coord_conv + coord_conv: true + # 是否drop_block + drop_block: true + # block_size + block_size: 3 + # keep_prob + keep_prob: 0.9 + # 是否spp + spp: true + +# YOLOv3Head +YOLOv3Head: + # anchors + anchors: [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + # anchor_masks + anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + # loss + loss: YOLOv3Loss + # 是否使用iou_aware + iou_aware: true + # iou_aware_factor + iou_aware_factor: 0.4 + +# YOLOv3Loss +YOLOv3Loss: + # ignore_thresh + ignore_thresh: 0.7 + # downsample + downsample: [32, 16, 8] + # 是否label_smooth + label_smooth: false + # scale_x_y + scale_x_y: 1.05 + # iou_loss + iou_loss: IouLoss + # iou_aware_loss + iou_aware_loss: IouAwareLoss + +# IouLoss +IouLoss: + loss_weight: 2.5 + loss_square: true + +# IouAwareLoss +IouAwareLoss: + loss_weight: 1.0 + +# BBoxPostProcess +BBoxPostProcess: + decode: + name: YOLOBox + conf_thresh: 0.01 + downsample_ratio: 32 + clip_bbox: true + scale_x_y: 1.05 + # nms 配置 + nms: + name: MatrixNMS + keep_top_k: 100 + score_threshold: 0.01 + post_threshold: 0.01 + nms_top_k: -1 + background_label: -1 + +``` + +- 运行时置文件 `runtime.yml` + +```yaml +# 是否使用gpu +use_gpu: true +# 日志打印间隔 +log_iter: 20 +# save_dir +save_dir: output +# 模型保存间隔时间 +snapshot_epoch: 1 +``` -- GitLab