update comment of yolov3_mobilenet_v1_roadsign.yml, test=document_fix (#1422)

7ab8629e · cnn · GitHub · 915d5370 · 7ab8629e · 7ab8629e
隐藏空白更改
内联并排

Showing with 147 addition and 188 deletion

configs/yolov3_mobilenet_v1_roadsign.yml configs/yolov3_mobilenet_v1_roadsign.yml +140 -185

docs/tutorials/QUICK_STARTED_cn.md docs/tutorials/QUICK_STARTED_cn.md +7 -3

未找到文件。
--- a/configs/yolov3_mobilenet_v1_roadsign.yml
+++ b/configs/yolov3_mobilenet_v1_roadsign.yml
 #####################################基础配置#####################################
-# 检测算法使用YOLOv3，backbone使用MobileNet_v1，数据集使用roadsign_voc的配置文件模板，本配置文件默认使用单卡，单卡的batch_size=1
+# 检测算法使用YOLOv3，backbone使用MobileNet_v1
 # 检测模型的名称
 architecture: YOLOv3
 # 根据硬件选择是否使用GPU
 use_gpu: true
-# ### max_iters为最大迭代次数，而一个iter会运行batch_size * device_num张图片。batch_size在下面 TrainReader.batch_size设置。
+  # ### max_iters为最大迭代次数，而一个iter会运行batch_size * device_num张图片。batch_size在下面 TrainReader.batch_size设置。
 max_iters: 1200
-# log平滑参数
+# log平滑参数，平滑窗口大小，会从取历史窗口中取log_smooth_window大小的loss求平均值
 log_smooth_window: 20
 # 模型保存文件夹
 save_dir: output
@@ -19,25 +19,23 @@ map_type: integral
 # ### pretrain_weights 可以是imagenet的预训练好的分类模型权重，也可以是在VOC或COCO数据集上的预训练的检测模型权重
 # 模型配置文件和权重文件可参考[模型库](https://github.com/PaddlePaddle/PaddleDetection/blob/release/0.4/docs/MODEL_ZOO.md)
 pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar
-# best模型保存路径
+# 模型保存文件夹，如果开启了--eval，会在这个文件夹下保存best_model
-weights: output/yolov3_mobilenet_v1_roadsign_voc_template/best_model
+weights: output/yolov3_mobilenet_v1_roadsign_coco_template/
-# ### 根据用户数据设置类别数
+# ### 根据用户数据设置类别数，注意这里不含背景类
 num_classes: 4
 # finetune时忽略的参数，按照正则化匹配，匹配上的参数会被忽略掉
 finetune_exclude_pretrained_params: ['yolo_output']
-# 是否使用ppdet中的YOLOv3Loss，ppdet中的YOLOv3Loss兼容Paddle中yolov3_loss，且可设置参更多参数。
+# use_fine_grained_loss
-# true: 使用ppdet/modeling/losses/yolo_loss.py
-# false:使用Paddle中yolov3_loss，文档：https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/layers_cn/yolov3_loss_cn.html#yolov3-loss
 use_fine_grained_loss: false
 # 检测模型的结构
 YOLOv3:
+  # 默认是 MobileNetv1
  backbone: MobileNet
  yolo_head: YOLOv3Head
 # 检测模型的backbone
 MobileNet:
-  norm_type: sync_bn
  norm_decay: 0.
  conv_group_scale: 1
  with_extra_blocks: false
@@ -50,18 +48,21 @@ YOLOv3Head:
  anchors: [[10, 13], [16, 30], [33, 23],
            [30, 61], [62, 45], [59, 119],
            [116, 90], [156, 198], [373, 326]]
-  # norm_decay
-  norm_decay: 0.
  # yolo_loss
  yolo_loss: YOLOv3Loss
  # nms 类型参数，可以设置为[MultiClassNMS, MultiClassSoftNMS, MatrixNMS], 默认使用 MultiClassNMS
  nms:
+    # background_label，背景标签（类别）的索引，如果设置为 0 ，则忽略背景标签（类别）。如果设置为 -1 ，则考虑所有类别。默认值：0
    background_label: -1
+    # NMS步骤后每个图像要保留的总bbox数。 -1表示在NMS步骤之后保留所有bbox。
    keep_top_k: 100
-    # nms阈值
+    # 在NMS中用于剔除检测框IOU的阈值，默认值：0.3 。
    nms_threshold: 0.45
+    # 基于 score_threshold 的过滤检测后，根据置信度保留的最大检测次数。
    nms_top_k: 1000
+    # 是否归一化，默认值：True 。
    normalized: false
+    #  过滤掉低置信度分数的边界框的阈值。
    score_threshold: 0.01
 YOLOv3Loss:
@@ -77,10 +78,10 @@ LearningRate:
  # ### 学习率设置 参考 https://github.com/PaddlePaddle/PaddleDetection/blob/release/0.4/docs/FAQ.md#faq%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98
  # base_lr
  base_lr: 0.0001
-  # 学习率规划器
+  # 学习率调整策略
  # 具体实现参考[API](fluid.layers.piecewise_decay)
  schedulers:
-  # 学习率衰减策略
+  # 学习率调整策略
  - !PiecewiseDecay
    gamma: 0.1
    milestones:
@@ -112,236 +113,190 @@ OptimizerBuilder:
 # 模型训练集设置参考
 # 训练、验证、测试使用的数据配置主要区别在数据路径、模型输入、数据增强参数设置
-# 如果使用 yolov3_reader.yml，下面的参数设置优先级高，会覆盖yolov3_reader.yml中的参数设置，对于用自定义数据建议将数据配置文件写到下面。
+# 如果使用 yolov3_reader.yml，下面的参数设置优先级高，会覆盖yolov3_reader.yml中的参数设置。
 # _READER_: 'yolov3_reader.yml'
 TrainReader:
  # 训练过程中模型的输入设置
  # 包括图片，图片长宽高等基本信息，图片id，标记的目标框，类别等信息
-  # 不同算法，不同数据集 inputs_def 不同，有的算法需要限制输入图像尺寸，有的不需要###
  inputs_def:
-    # YOLO 输入图像大小，必须是32的整数倍###
-    # 注意需要与下面的图像尺寸的设置保存一致###
-    image_shape: [3, 608, 608]
-    # 不同算法，不同数据集 fields 不同###
-    # YOLO系列 VOC格式数据： ['image', 'gt_bbox', 'gt_class', 'gt_score']，且需要设置num_max_boxes
-    # YOLO系列 COCO格式数据：['image', 'gt_bbox', 'gt_class', 'gt_score']，且需要设置num_max_boxes
-    # FasterRCNN 系列 COCO格式数据：['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd']
-    # MaskRCNN 系列 COCO格式数据：['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_mask']
-    # AnchorFree 系列 COCO格式数据：['image', 'im_id', 'gt_bbox', 'gt_class', 'tl_heatmaps', 'br_heatmaps', 'tl_regrs', 'br_regrs', 'tl_tags', 'br_tags', 'tag_masks']
-    # VOC数据格式需要读取的字段，注意与COCO不同。注意TrainReader、EvalReader、TestReader字段略有不同
    fields: ['image', 'gt_bbox', 'gt_class', 'gt_score']
+    # num_max_boxes，每个样本的groud truth的最多保留个数，若不够用0填充。
-    # 仅YOLO系列需要设置
    num_max_boxes: 50
  # 训练数据集路径
  dataset:
    # 指定数据集格式
    !VOCDataSet
-    #dataset/xxx/
+      #dataset/xxx/
-    #├── annotations
+      #├── annotations
-    #│   ├── xxx1.xml
+      #│   ├── xxx1.xml
-    #│   ├── xxx2.xml
+      #│   ├── xxx2.xml
-    #│   ├── xxx3.xml
+      #│   ├── xxx3.xml
-    #│   |   ...
+      #│   |   ...
-    #├── images
+      #├── images
-    #│   ├── xxx1.png
+      #│   ├── xxx1.png
-    #│   ├── xxx2.png
+      #│   ├── xxx2.png
-    #│   ├── xxx3.png
+      #│   ├── xxx3.png
-    #│   |   ...
+      #│   |   ...
-    #├── label_list.txt (用户自定义必须提供，且文件名称必须是label_list.txt。当使用VOC数据且use_default_label=true时，可不提供 )
+      #├── label_list.txt (用户自定义必须提供，且文件名称必须是label_list.txt。当使用VOC数据且use_default_label=true时，可不提供 )
-    #├── train.txt (训练数据集文件列表, ./images/xxx1.png ./Annotations/xxx1.xml)
+      #├── train.txt (训练数据集文件列表, ./images/xxx1.png ./Annotations/xxx1.xml)
-    #└── valid.txt (测试数据集文件列表)
+      #└── valid.txt (测试数据集文件列表)
+      # 图片文件夹相对路径，路径是相对于dataset_dir，图像路径= dataset_dir + image_dir + image_name
-    # 数据集相对路径
+      dataset_dir: dataset/roadsign_voc
-    dataset_dir: dataset/roadsign_voc
+      # 标记文件名
-    # 标记文件名
+      anno_path: train.txt
-    anno_path: train.txt
+      # 是否包含背景类，若with_background=true，num_classes需要+1
+      # YOLO 系列with_background必须是false，FasterRCNN系列是true ###
-    # 对于VOC、COCO等比赛数据集，可以不指定类别标签文件，use_default_label可以是true。
+      with_background: false
-    # 对于用户自定义数据，如果是VOC格式数据，use_default_label必须要设置成false，且需要提供label_list.txt。如果是COCO格式数据，不需要设置这个参数。
-    use_default_label: false
-    # 是否包含背景类，若with_background=true，num_classes需要+1
-    # YOLO 系列with_background必须是false，FasterRCNN系列是true ###
-    with_background: false
-  # 1个GPU的batch size，默认为1。需要注意：每个iter迭代会运行batch_size * device_num张图片
-  batch_size: 8
-  # 共享内存bufsize，若内存有限，请设置小一些。
-  bufsize: 2
-  # 选择是否打乱所有样本的顺序
-  shuffle: true
-  # drop_empty 建议设置为true
-  drop_empty: true
-  # drop_last 如果最后一个batch的图片数量为奇数，选择是否丢掉这个batch不进行训练。
-  # 注意，在某些情况下，drop_last=false时训练过程中可能会出错，建议训练时都设置为true
-  drop_last: true
-  # mixup_epoch
-  mixup_epoch: -1
-  # 选择是否使用多进程，默认为false
-  use_process: false
-  # 若选用多进程，设置使用多进程/线程的数目，默认为4，建议与CPU核数一致
-  # 开启多进程后，占用内存会成倍增加，根据内存设置###
-  worker_num: 4
-  # 数据预处理和数据增强部分，此部分设置要特别注意###
-  # 不同算法对数据的预处理流程不同，建议使用对应算法默认的数据处理流程。
-  # 比如，YOLO、FPN算法，要求输入图像尺寸必须是32的整数倍
-  # 以下是对一个batch中的每单张图片做的数据增强
  sample_transforms:
-  # 读取Image图像为numpy数组
+    # 读取Image图像为numpy数组
-  # 可以选择将图片从BGR转到RGB，可以选择对一个batch中的图片做mixup增强
+    # 可以选择将图片从BGR转到RGB，可以选择对一个batch中的图片做mixup增强
-  - !DecodeImage
+    - !DecodeImage
-    to_rgb: true
+      to_rgb: True
-    with_mixup: false
+      with_mixup: True
-  # box 坐标归一化，仅仅YOLO系列算法需要
+    # MixupImage
-  - !NormalizeBox {}
+    - !MixupImage
-  # 以prob概率随机反转
+      alpha: 1.5
-  - !RandomFlipImage
+      beta: 1.5
-    is_normalized: true
+    # ColorDistort
-    prob: 0.5
+    - !ColorDistort {}
-  # 归一化
+    # RandomExpand
+    - !RandomExpand
+      fill_value: [123.675, 116.28, 103.53]
+      # 随机扩充比例，默认值是4.0
+      ratio: 1.5
+    - !RandomCrop {}
+    - !RandomFlipImage
+      is_normalized: false
+     # 归一化坐标
+    - !NormalizeBox {}
+    # 如果 bboxes 数量小于 num_max_boxes，填充值为0的 box
+    - !PadBox
+      num_max_boxes: 50
+    # 坐标格式转化，从XYXY转成XYWH格式
+    - !BboxXYXY2XYWH {}
+  # 以下是对一个batch中的所有图片同时做的数据处理
+  batch_transforms:
+  # 多尺度训练时，从list中随机选择一个尺寸，对一个batch数据同时同时resize
+  - !RandomShape
+    sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
+    random_inter: True
+  # NormalizeImage
  - !NormalizeImage
    mean: [0.485, 0.456, 0.406]
    std: [0.229, 0.224, 0.225]
-    is_scale: true
+    is_scale: True
    is_channel_first: false
-  # 如果 bboxes 数量小于 num_max_boxes，填充值为0的 box，仅仅YOLO系列算法需要
-  - !PadBox
-    num_max_boxes: 50
-  # 坐标格式转化，从XYXY转成XYWH，仅仅YOLO系列算法需要
-  - !BboxXYXY2XYWH {}
-  # 以下是对一个batch中的所有图片同时做的数据增强
-  batch_transforms:
-  # 多尺度训练时，从list中随机选择一个尺寸，对一个batch数据同时同时resize
-  - !RandomShape
-    sizes: [608]
-  # channel_first
  - !Permute
-    channel_first: true
    to_bgr: false
+    channel_first: True
+  # Gt2YoloTarget is only used when use_fine_grained_loss set as true,
+  # this operator will be deleted automatically if use_fine_grained_loss
+  # is set as false
+  - !Gt2YoloTarget
+    anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+    anchors: [[10, 13], [16, 30], [33, 23],
+              [30, 61], [62, 45], [59, 119],
+              [116, 90], [156, 198], [373, 326]]
+    downsample_ratios: [32, 16, 8]
+  # 1个GPU的batch size，默认为1。需要注意：每个iter迭代会运行batch_size * device_num张图片
+  batch_size: 8
+  # 是否shuffle
+  shuffle: true
+  # mixup，-1表示不做Mixup数据增强。注意，这里是epoch为单位
+  mixup_epoch: 250
+  # 注意，在某些情况下，drop_last=false时训练过程中可能会出错，建议训练时都设置为true
+  drop_last: true
+  # 若选用多进程，设置使用多进程/线程的数目
+  # 开启多进程后，占用内存会成倍增加，根据内存设置###
+  worker_num: 4
+  # 共享内存bufsize。注意，缓存是以batch为单位，缓存的样本数据总量为batch_size * bufsize，所以请注意不要设置太大，请根据您的硬件设置。
+  bufsize: 2
+  # 是否使用多进程
+  use_process: true
 EvalReader:
  # 评估过程中模型的输入设置
-  # 1个GPU的batch size，默认为1。需要注意：每个iter迭代会运行batch_size * device_num张图片
+  # 包括图片，图片长宽高等基本信息，图片id，标记的目标框，类别等信息
-  batch_size: 1
-  # 共享内存bufsize，共享内存中训练样本数量是： bufsize * batch_size * 2 张图
-  bufsize: 1
-  # shuffle=false
-  shuffle: false
-  # 一般的评估时，batch_size=1，drop_empty可设置成 false
-  drop_empty: false
-  # 一般的评估时，batch_size=1，drop_last可设置成 false
-  drop_last: false
-  # 选择是否使用多进程，默认为false
-  use_process: false
-  # 若选用多进程，设置使用多进程/线程的数目，默认为4，建议与CPU核数一致
-  # 开启多进程后，占用内存会成倍增加，根据内存设置 ###
-  worker_num: 1
  inputs_def:
-    # 图像尺寸与上保持一致
-    image_shape: [3, 608, 608]
-    # fields 字段
    fields: ['image', 'im_size', 'im_id', 'gt_bbox', 'gt_class', 'is_difficult']
+    # num_max_boxes，每个样本的groud truth的最多保留个数，若不够用0填充。
    num_max_boxes: 50
+  # 数据集路径
-  # 评估数据集路径
  dataset:
    !VOCDataSet
-    dataset_dir: dataset/roadsign_voc
+      # 图片文件夹相对路径，路径是相对于dataset_dir，图像路径= dataset_dir + image_dir + image_name
-    # 评估文件列表
+      dataset_dir: dataset/roadsign_voc
-    anno_path: valid.txt
+      # 评估文件列表
+      anno_path: valid.txt
-    # 对于VOC、COCO等比赛数据集，可以不指定类别标签文件，use_default_label可以是true。
+      # 是否包含背景类，若with_background=true，num_classes需要+1
-    # 对于用户自定义数据，如果是VOC格式数据，use_default_label必须要设置成false，且需要提供label_list.txt。如果是COCO格式数据，不需要设置这个参数。
+      # YOLO 系列with_background必须是false，FasterRCNN系列是true ###
-    use_default_label: false
+      with_background: false
-    # 是否包含背景类，若with_background=true，num_classes需要+1
-    # YOLO 系列with_background必须是false，FasterRCNN系列是true ###
-    with_background: false
-  # 单张图的 transforms
  sample_transforms:
-    # DecodeImage
+    # 读取Image图像为numpy数组
+    # 可以选择将图片从BGR转到RGB，可以选择对一个batch中的图片做mixup增强
    - !DecodeImage
-      to_rgb: true
+      to_rgb: True
+    # ResizeImage
-    # 与上面图像尺寸保持一致 ###
    - !ResizeImage
      target_size: 608
      interp: 2
-    # 图像归一化
+    # NormalizeImage
    - !NormalizeImage
      mean: [0.485, 0.456, 0.406]
      std: [0.229, 0.224, 0.225]
-      is_scale: true
+      is_scale: True
      is_channel_first: false
    # 如果 bboxes 数量小于 num_max_boxes，填充值为0的 box
    - !PadBox
      num_max_boxes: 50
    - !Permute
      to_bgr: false
-      channel_first: true
+      channel_first: True
+  # 1个GPU的batch size，默认为1。需要注意：每个iter迭代会运行batch_size * device_num张图片
-TestReader:
+  batch_size: 8
-  # 测试过程中模型的输入设置
+  # drop_empty
-  # 预测时 batch_size设置为1
-  batch_size: 1
-  # 一般的预测时，batch_size=1，drop_empty可设置成 false
  drop_empty: false
-  # 一般的预测时，batch_size=1，drop_last可设置成 false
+  # 若选用多进程，设置使用多进程/线程的数目
-  drop_last: false
+  # 开启多进程后，占用内存会成倍增加，根据内存设置###
+  worker_num: 4
+  # 共享内存bufsize。注意，缓存是以batch为单位，缓存的样本数据总量为batch_size * bufsize，所以请注意不要设置太大，请根据您的硬件设置。
+  bufsize: 2
+TestReader:
+  # 预测过程中模型的输入设置
+  # 包括图片，图片长宽高等基本信息，图片id，标记的目标框，类别等信息
  inputs_def:
-    # 预测时输入图像尺寸，与上面图像尺寸保持一致
+    # 预测图像输入尺寸
    image_shape: [3, 608, 608]
-    # 预测时需要读取字段
-    # fields 字段
    fields: ['image', 'im_size', 'im_id']
+  # 数据集路径
  dataset:
    # 预测数据
    !ImageFolder
-    anno_path: dataset/roadsign_voc/label_list.txt
+      # anno_path
+      anno_path: dataset/roadsign_voc/label_list.txt
-    # 对于VOC、COCO等比赛数据集，可以不指定类别标签文件，use_default_label可以是true。
+      # 是否包含背景类，若with_background=true，num_classes需要+1
-    # 对于用户自定义数据，如果是VOC格式数据，use_default_label必须要设置成false，且需要提供label_list.txt。如果是COCO格式数据，不需要设置这个参数。
+      # YOLO 系列with_background必须是false，FasterRCNN系列是true ###
-    use_default_label: false
+      with_background: false
-    # 是否包含背景类，若with_background=true，num_classes需要+1
-    # YOLO 系列with_background必须是false，FasterRCNN系列是true ###
-    with_background: false
-  # 单张图的 transforms
  sample_transforms:
-    # DecodeImage
    - !DecodeImage
-      to_rgb: true
+      to_rgb: True
-    # 注意与上面图像尺寸保持一致
+    # ResizeImage
    - !ResizeImage
+      # 注意与上面图像尺寸保持一致
      target_size: 608
      interp: 2
    # NormalizeImage
    - !NormalizeImage
      mean: [0.485, 0.456, 0.406]
      std: [0.229, 0.224, 0.225]
-      is_scale: true
+      is_scale: True
      is_channel_first: false
    # Permute
    - !Permute
      to_bgr: false
-      channel_first: true
+      channel_first: True
+  # 1个GPU的batch size，默认为1
+  batch_size: 1
--- a/docs/tutorials/QUICK_STARTED_cn.md
+++ b/docs/tutorials/QUICK_STARTED_cn.md
@@ -29,6 +29,8 @@ python dataset/roadsign_voc/download_roadsign_voc.py
 # -c 参数表示指定使用哪个配置文件
 # -o 参数表示指定配置文件种的全局变量（覆盖配置文件种的设置），这里设置使用gpu，
 # --eval 参数表示边训练边评估，会自动保存一个评估结果最的名为best_model.pdmodel的模型
 python tools/train.py -c configs/yolov3_mobilenet_v1_roadsign.yml --eval -o use_gpu=true
 ```
@@ -36,7 +38,7 @@ python tools/train.py -c configs/yolov3_mobilenet_v1_roadsign.yml --eval -o use_
 **但注意VisualDL需Python>=3.5**
-首先安装VisualDL
+首先安装[VisualDL](https://github.com/PaddlePaddle/VisualDL)
 ```
 python -m pip install visualdl -i https://mirror.baidu.com/pypi/simple
 ```
@@ -48,9 +50,9 @@ python -u tools/train.py -c configs/yolov3_mobilenet_v1_roadsign.yml \
                        --eval
 ```
 通过visualdl命令实时查看变化曲线：
+```
 visualdl --logdir vdl_dir/scalar/ --host <host_IP> --port <port_num>
+```
 ### 2、评估
 ```
@@ -67,6 +69,8 @@ python tools/eval.py -c configs/yolov3_mobilenet_v1_roadsign.yml-o use_gpu=true
 # -o 参数表示指定配置文件种的全局变量（覆盖配置文件种的设置）
 # --infer_img 参数指定预测图像路径
 # 预测结束后会在output文件夹中生成一张画有预测结果的同名图像
 python tools/infer.py -c configs/yolov3_mobilenet_v1_roadsign.yml -o use_gpu=true --infer_img=demo/road554.png
 ```