From f3a8f1b2b980b828bfa1b746e9f64941e3a9c355 Mon Sep 17 00:00:00 2001
From: zhiboniu <31800336+zhiboniu@users.noreply.github.com>
Date: Fri, 3 Dec 2021 20:28:39 +0800
Subject: [PATCH] add yaml guide file; (#4803)

test=document_fix
---
 configs/keypoint/README.md               |   2 +
 docs/tutorials/KeyPointConfigGuide_cn.md | 299 +++++++++++++++++++++++
 docs/tutorials/KeyPointConfigGuide_en.md | 299 +++++++++++++++++++++++
 3 files changed, 600 insertions(+)
 create mode 100644 docs/tutorials/KeyPointConfigGuide_cn.md
 create mode 100644 docs/tutorials/KeyPointConfigGuide_en.md

diff --git a/configs/keypoint/README.md b/configs/keypoint/README.md
index 516565614..98d3e641c 100644
--- a/configs/keypoint/README.md
+++ b/configs/keypoint/README.md
@@ -50,6 +50,8 @@ MPII数据集
 
 ​    目前KeyPoint模型支持[COCO](https://cocodataset.org/#keypoints-2017)数据集和[MPII](http://human-pose.mpi-inf.mpg.de/#overview)数据集，数据集的准备方式请参考[关键点数据准备](../../docs/tutorials/PrepareKeypointDataSet_cn.md)。
 
+​    关于config配置文件内容说明请参考[关键点配置文件说明](../../docs/tutorials/KeyPointConfigGuide_cn.md)。
+
 
   - 请注意，Top-Down方案使用检测框测试时，需要通过检测模型生成bbox.json文件。COCO val2017的检测结果可以参考[Detector having human AP of 56.4 on COCO val2017 dataset](https://paddledet.bj.bcebos.com/data/bbox.json)，下载后放在根目录（PaddleDetection）下，然后修改config配置文件中`use_gt_bbox: False`后生效。然后正常执行测试命令即可。
 
diff --git a/docs/tutorials/KeyPointConfigGuide_cn.md b/docs/tutorials/KeyPointConfigGuide_cn.md
new file mode 100644
index 000000000..9675849c8
--- /dev/null
+++ b/docs/tutorials/KeyPointConfigGuide_cn.md
@@ -0,0 +1,299 @@
+**# config yaml配置项说明**
+
+KeyPoint 使用时config文件配置项说明，以[tinypose_256x192.yml](../../configs/keypoint/tiny_pose/tinypose_256x192.yml)为例
+
+```yaml
+use_gpu: true                                                                                  #是否使用gpu训练
+
+log_iter: 5                                                                                    #打印log的iter间隔
+
+save_dir: output                                                                               #模型保存目录
+
+snapshot_epoch: 10                                                                             #保存模型epoch间隔
+
+weights: output/tinypose_256x192/model_final                                                   #测试加载模型路径（不含后缀“.pdparams”）
+
+epoch: 420                                                                                     #总训练epoch数量
+
+num_joints: &num_joints 17                                                                     #关键点数量
+
+pixel_std: &pixel_std 200                                                                      #变换时相对比率像素（无需关注，不动就行）
+
+metric: KeyPointTopDownCOCOEval                                                                #metric评估函数
+
+num_classes: 1                                                                                 #种类数（检测模型用，不需关注）
+
+train_height: &train_height 256                                                                #模型输入尺度高度变量设置
+
+train_width: &train_width 192                                                                  #模型输入尺度宽度变量设置
+
+trainsize: &trainsize [*train_width, *train_height]                                            #模型输入尺寸，使用已定义变量
+
+hmsize: &hmsize [48, 64]                                                                       #输出热力图尺寸（宽，高）
+
+flip_perm: &flip_perm [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]  #左右关键点经图像翻转时对应关系，例如：图像翻转后，左手腕变成了右手腕，右手腕变成了左手腕
+
+
+
+
+
+\#####model
+
+architecture: TopDownHRNet                                                                     #模型框架结构类选择
+
+
+
+TopDownHRNet:                                                                                  #TopDownHRNet相关配置
+
+  backbone: LiteHRNet                                                                          #模型主干网络
+
+  post_process: HRNetPostProcess                                                               #模型后处理类
+
+  flip_perm: *flip_perm                                                                        #同上flip_perm
+
+  num_joints: *num_joints                                                                      #关键点数量（输出通道数量）
+
+  width: &width 40                                                                             #backbone输出通道数
+
+  loss: KeyPointMSELoss                                                                        #loss函数选择
+
+  use_dark: true                                                                               #是否使用DarkPose后处理
+
+
+
+LiteHRNet:                                                                                     #LiteHRNet相关配置
+
+  network_type: wider_naive                                                                    #网络结构类型选择
+
+  freeze_at: -1                                                                                #梯度截断branch id，截断则该branch梯度不会反传
+
+  freeze_norm: false                                                                           #是否固定normalize层参数
+
+  return_idx: [0]                                                                              #返回feature的branch id
+
+
+
+KeyPointMSELoss:                                                                               #Loss相关配置
+
+  use_target_weight: true                                                                      #是否使用关键点权重
+
+  loss_scale: 1.0                                                                              #loss比率调整，1.0表示不变
+
+
+
+\#####optimizer
+
+LearningRate:                                                                                  #学习率相关配置
+
+  base_lr: 0.002                                                                               #初始基础学习率
+
+  schedulers:
+
+  \- !PiecewiseDecay                                                                           #衰减策略
+
+​    milestones: [380, 410]                                                                     #衰减时间对应epoch次数
+
+​    gamma: 0.1                                                                                 #衰减率
+
+  \- !LinearWarmup                                                                             #Warmup策略
+
+​    start_factor: 0.001                                                                        #warmup初始学习率比率
+
+​    steps: 500                                                                                 #warmup所用iter次数
+
+
+
+OptimizerBuilder:                                                                              #学习策略设置
+
+  optimizer:
+
+​    type: Adam                                                                                 #学习策略Adam
+
+  regularizer:
+
+​    factor: 0.0                                                                                #正则项权重
+
+​    type: L2                                                                                   #正则类型L2/L1
+
+
+
+
+
+\#####data
+
+TrainDataset:                                                                                  #训练数据集设置
+
+  !KeypointTopDownCocoDataset                                                                  #数据加载类
+
+​    image_dir: ""                                                                              #图片文件夹,对应dataset_dir/image_dir
+
+​    anno_path: aic_coco_train_cocoformat.json                                                  #训练数据Json文件，coco格式
+
+​    dataset_dir: dataset                                                                       #训练数据集所在路径，image_dir、anno_path路径基于此目录
+
+​    num_joints: *num_joints                                                                    #关键点数量，使用已定义变量
+
+​    trainsize: *trainsize                                                                      #训练使用尺寸，使用已定义变量
+
+​    pixel_std: *pixel_std                                                                      #同上pixel_std
+
+​    use_gt_bbox: True                                                                          #是否使用gt框
+
+
+
+
+
+EvalDataset:                                                                                   #评估数据集设置
+
+  !KeypointTopDownCocoDataset                                                                  #数据加载类
+
+​    image_dir: val2017                                                                         #图片文件夹
+
+​    anno_path: annotations/person_keypoints_val2017.json                                       #评估数据Json文件，coco格式
+
+​    dataset_dir: dataset/coco                                                                  #数据集路径，image_dir、anno_path路径基于此目录
+
+​    num_joints: *num_joints                                                                    #关键点数量，使用已定义变量
+
+​    trainsize: *trainsize                                                                      #训练使用尺寸，使用已定义变量
+
+​    pixel_std: *pixel_std                                                                      #同上pixel_std
+
+​    use_gt_bbox: True                                                                          #是否使用gt框，一般测试时用
+
+​    image_thre: 0.5                                                                            #检测框阈值设置，测试时使用非gt_bbox时用
+
+
+
+TestDataset:                                                                                   #纯测试数据集设置，无label
+
+  !ImageFolder                                                                                 #数据加载类，图片文件夹类型
+
+​    anno_path: dataset/coco/keypoint_imagelist.txt                                             #测试图片列表文件
+
+
+
+worker_num: 2                                                                                  #数据加载worker数量，一般2-4，太多可能堵塞
+
+global_mean: &global_mean [0.485, 0.456, 0.406]                                                #全局均值变量设置
+
+global_std: &global_std [0.229, 0.224, 0.225]                                                  #全局方差变量设置
+
+TrainReader:                                                                                   #训练数据加载类设置
+
+  sample_transforms:                                                                           #数据预处理变换设置
+
+​    \- RandomFlipHalfBodyTransform:                                                            #随机翻转&随机半身变换类
+
+​        scale: 0.25                                                                            #最大缩放尺度比例
+
+​        rot: 30                                                                                #最大旋转角度
+
+​        num_joints_half_body: 8                                                                #关键点小于此数不做半身变换
+
+​        prob_half_body: 0.3                                                                    #半身变换执行概率（满足关键点数量前提下）
+
+​        pixel_std: *pixel_std                                                                  #同上pixel_std
+
+​        trainsize: *trainsize                                                                  #训练尺度，同上trainsize
+
+​        upper_body_ids: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]                                     #上半身关键点id
+
+​        flip_pairs: *flip_perm                                                                 #左右关键点对应关系，同上flip_perm
+
+​    \- AugmentationbyInformantionDropping:
+
+​        prob_cutout: 0.5                                                                       #随机擦除变换概率
+
+​        offset_factor: 0.05                                                                    #擦除位置中心点随机波动范围相对图片宽度比例
+
+​        num_patch: 1                                                                           #擦除位置数量
+
+​        trainsize: *trainsize                                                                  #同上trainsize
+
+​    \- TopDownAffine:
+
+​        trainsize: *trainsize                                                                  #同上trainsize
+
+​        use_udp: true                                                                          #是否使用udp_unbias（flip测试使用）
+
+​    \- ToHeatmapsTopDown_DARK:                                                                 #生成热力图gt类
+
+​        hmsize: *hmsize                                                                        #热力图尺寸
+
+​        sigma: 2                                                                               #生成高斯核sigma值设置
+
+  batch_transforms:
+
+​    \- NormalizeImage:                                                                         #图像归一化类
+
+​        mean: *global_mean                                                                     #均值设置，使用已有变量
+
+​        std: *global_std                                                                       #方差设置，使用已有变量
+
+​        is_scale: true                                                                         #图像元素是否除255.，即[0,255]到[0,1]
+
+​    \- Permute: {}                                                                             #通道变换HWC->CHW,一般都需要
+
+  batch_size: 128                                                                              #训练时batchsize
+
+  shuffle: true                                                                                #数据集是否shuffle
+
+  drop_last: false                                                                             #数据集对batchsize取余数量是否丢弃
+
+
+
+EvalReader:
+
+  sample_transforms:                                                                           #数据预处理变换设置，意义同TrainReader
+
+​    \- TopDownAffine:                                                                          #Affine变换设置
+
+​        trainsize: *trainsize                                                                  #训练尺寸同上trainsize，使用已有变量
+
+​        use_udp: true                                                                          #是否使用udp_unbias，与训练需对应
+
+  batch_transforms:
+
+​    \- NormalizeImage:                                                                         #图片归一化，与训练需对应
+
+​        mean: *global_mean
+
+​        std: *global_std
+
+​        is_scale: true
+
+​    \- Permute: {}                                                                             #通道变换HWC->CHW
+
+  batch_size: 16                                                                               #测试时batchsize
+
+
+
+TestReader:
+
+  inputs_def:
+
+​    image_shape: [3, *train_height, *train_width]                                              #输入数据维度设置，CHW
+
+  sample_transforms:
+
+​    \- Decode: {}                                                                              #图片加载
+
+​    \- TopDownEvalAffine:                                                                      #Affine类，Eval时用
+
+​        trainsize: *trainsize                                                                  #输入图片尺度
+
+​    \- NormalizeImage:                                                                         #输入图像归一化
+
+​        mean: *global_mean                                                                     #均值
+
+​        std: *global_std                                                                       #方差
+
+​        is_scale: true                                                                         #图像元素是否除255.，即[0,255]到[0,1]
+
+​    \- Permute: {}                                                                             #通道变换HWC->CHW
+
+  batch_size: 1                                                                                #Test batchsize
+
+  fuse_normalize: false                                                                        #导出模型时是否内融合归一化操作（若是，预处理中可省略normalize，可以加快pipeline速度）
+```
diff --git a/docs/tutorials/KeyPointConfigGuide_en.md b/docs/tutorials/KeyPointConfigGuide_en.md
new file mode 100644
index 000000000..fa700a28e
--- /dev/null
+++ b/docs/tutorials/KeyPointConfigGuide_en.md
@@ -0,0 +1,299 @@
+**# config yaml guide**
+
+KeyPoint config guide，Take an example of [tinypose_256x192.yml](../../configs/keypoint/tiny_pose/tinypose_256x192.yml)
+
+```yaml
+use_gpu: true                                                                                  #train with gpu or not
+
+log_iter: 5                                                                                    #print log every 5 iter
+
+save_dir: output                                                                               #the directory to save model
+
+snapshot_epoch: 10                                                                             #save model every 10 epochs
+
+weights: output/tinypose_256x192/model_final                                                   #the weight to load(without postfix “.pdparams”）
+
+epoch: 420                                                                                     #the total epoch number to train
+
+num_joints: &num_joints 17                                                                     #number of joints
+
+pixel_std: &pixel_std 200                                                                      #the standard pixel length（don't care）
+
+metric: KeyPointTopDownCOCOEval                                                                #metric function
+
+num_classes: 1                                                                                 #number of classes（just for object detection, don't care）
+
+train_height: &train_height 256                                                                #the height of model input
+
+train_width: &train_width 192                                                                  #the width of model input
+
+trainsize: &trainsize [*train_width, *train_height]                                            #the shape of model input
+
+hmsize: &hmsize [48, 64]                                                                       #the shape of model output
+
+flip_perm: &flip_perm [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]  #the correspondence between left and right keypoint id, for example: left wrist become right wrist after image flip, and also the right wrist becomes left wrist
+
+
+
+
+
+\#####model
+
+architecture: TopDownHRNet                                                                     #the model architecture
+
+
+
+TopDownHRNet:                                                                                  #TopDownHRNet configs
+
+  backbone: LiteHRNet                                                                          #which backbone to use
+
+  post_process: HRNetPostProcess                                                               #the post_process to use
+
+  flip_perm: *flip_perm                                                                        #same to the upper "flip_perm"
+
+  num_joints: *num_joints                                                                      #the joint number（the number of output channels）
+
+  width: &width 40                                                                             #backbone output channels
+
+  loss: KeyPointMSELoss                                                                        #loss funciton
+
+  use_dark: true                                                                               #whther to use DarkPose in postprocess
+
+
+
+LiteHRNet:                                                                                     #LiteHRNet configs
+
+  network_type: wider_naive                                                                    #the network type of backbone
+
+  freeze_at: -1                                                                                #the branch match this id doesn't backward，-1 means all branch backward
+
+  freeze_norm: false                                                                           #whether to freeze normalize weights
+
+  return_idx: [0]                                                                              #the branch id to fetch features
+
+
+
+KeyPointMSELoss:                                                                               #Loss configs
+
+  use_target_weight: true                                                                      #whether to use target weights
+
+  loss_scale: 1.0                                                                              #loss weights，finalloss = loss*loss_scale
+
+
+
+\#####optimizer
+
+LearningRate:                                                                                  #LearningRate configs
+
+  base_lr: 0.002                                                                               #the original base learning rate
+
+  schedulers:
+
+  \- !PiecewiseDecay                                                                           #the scheduler to adjust learning rate
+
+​    milestones: [380, 410]                                                                     #the milestones(epochs) to adjust learning rate
+
+​    gamma: 0.1                                                                                 #the ratio to adjust learning rate, new_lr = lr*gamma
+
+  \- !LinearWarmup                                                                             #Warmup configs
+
+​    start_factor: 0.001                                                                        #the original ratio with respect to base_lr
+
+​    steps: 500                                                                                 #iters used to warmup
+
+
+
+OptimizerBuilder:                                                                              #Optimizer type configs
+
+  optimizer:
+
+​    type: Adam                                                                                 #optimizer type: Adam
+
+  regularizer:
+
+​    factor: 0.0                                                                                #the regularizer weight
+
+​    type: L2                                                                                   #regularizer type: L2/L1
+
+
+
+
+
+\#####data
+
+TrainDataset:                                                                                  #Train Dataset configs
+
+  !KeypointTopDownCocoDataset                                                                  #the dataset class to load data
+
+​    image_dir: ""                                                                              #the image directory, relative to dataset_dir
+
+​    anno_path: aic_coco_train_cocoformat.json                                                  #the train datalist，coco format, relative to dataset_dir
+
+​    dataset_dir: dataset                                                                       #the dataset directory, the image_dir and anno_path based on this directory
+
+​    num_joints: *num_joints                                                                    #joint numbers
+
+​    trainsize: *trainsize                                                                      #the input size of model
+
+​    pixel_std: *pixel_std                                                                      #same to the upper "pixel_std"
+
+​    use_gt_bbox: True                                                                          #whether to use gt bbox, commonly used in eval
+
+
+
+
+
+EvalDataset:                                                                                   #Eval Dataset configs
+
+  !KeypointTopDownCocoDataset                                                                  #the dataset class to load data
+
+​    image_dir: val2017                                                                         #the image directory, relative to dataset_dir
+
+​    anno_path: annotations/person_keypoints_val2017.json                                       #the eval datalist，coco format, relative to dataset_dir
+
+​    dataset_dir: dataset/coco                                                                  #the dataset directory, the image_dir and anno_path based on this directory
+
+​    num_joints: *num_joints                                                                    #joint numbers
+
+​    trainsize: *trainsize                                                                      #the input size of model
+
+​    pixel_std: *pixel_std                                                                      #same to the upper "pixel_std"
+
+​    use_gt_bbox: True                                                                          #whether to use gt bbox, commonly used in eval
+
+​    image_thre: 0.5                                                                            #the threshold of detected rect, used while use_gt_bbox is False
+
+
+
+TestDataset:                                                                                   #the test dataset without label
+
+  !ImageFolder                                                                                 #the class to load data, find images by folder
+
+​    anno_path: dataset/coco/keypoint_imagelist.txt                                             #the image list file
+
+
+
+worker_num: 2                                                                                  #the workers to load Dataset
+
+global_mean: &global_mean [0.485, 0.456, 0.406]                                                #means used to nomalize image
+
+global_std: &global_std [0.229, 0.224, 0.225]                                                  #stds used to nomalize image
+
+TrainReader:                                                                                   #TrainReader configs
+
+  sample_transforms:                                                                           #transform configs
+
+​    \- RandomFlipHalfBodyTransform:                                                            #random flip & random HalfBodyTransform
+
+​        scale: 0.25                                                                            #the maximum scale for size transform
+
+​        rot: 30                                                                                #the maximum rotation to transoform
+
+​        num_joints_half_body: 8                                                                #the HalfBodyTransform is skiped while joints found is less than this number
+
+​        prob_half_body: 0.3                                                                    #the ratio of halfbody transform
+
+​        pixel_std: *pixel_std                                                                  #same to upper "pixel_std"
+
+​        trainsize: *trainsize                                                                  #the input size of model
+
+​        upper_body_ids: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]                                     #the joint id which is  belong to upper body
+
+​        flip_pairs: *flip_perm                                                                 #same to the upper "flip_perm"
+
+​    \- AugmentationbyInformantionDropping:
+
+​        prob_cutout: 0.5                                                                       #the probability to cutout keypoint
+
+​        offset_factor: 0.05                                                                    #the jitter offset of cutout position, expressed as a percentage of trainwidth
+
+​        num_patch: 1                                                                           #the numbers of area to cutout
+
+​        trainsize: *trainsize                                                                  #same to upper "trainsize"
+
+​    \- TopDownAffine:
+
+​        trainsize: *trainsize                                                                  #same to upper "trainsize"
+
+​        use_udp: true                                                                          #whether to use udp_unbias（just for flip eval）
+
+​    \- ToHeatmapsTopDown_DARK:                                                                 #generate gt heatmaps
+
+​        hmsize: *hmsize                                                                        #the size of output heatmaps
+
+​        sigma: 2                                                                               #the sigma of gaussin kernel which used to generate gt heatmaps
+
+  batch_transforms:
+
+​    \- NormalizeImage:                                                                         #image normalize class
+
+​        mean: *global_mean                                                                     #mean of normalize
+
+​        std: *global_std                                                                       #std of normalize
+
+​        is_scale: true                                                                         #whether scale by 1/255 to every image pixels，transform pixel from [0,255] to [0,1]
+
+​    \- Permute: {}                                                                             #channel transform from HWC to CHW
+
+  batch_size: 128                                                                              #batchsize used for train
+
+  shuffle: true                                                                                #whether to shuffle the images before train
+
+  drop_last: false                                                                             #whether drop the last images which is not enogh for batchsize
+
+
+
+EvalReader:
+
+  sample_transforms:                                                                           #transform configs
+
+​    \- TopDownAffine:                                                                          #Affine configs
+
+​        trainsize: *trainsize                                                                  #same to upper "trainsize"
+
+​        use_udp: true                                                                          #whether to use udp_unbias（just for flip eval）
+
+  batch_transforms:
+
+​    \- NormalizeImage:                                                                         #image normalize, the values should be same to values in TrainReader
+
+​        mean: *global_mean
+
+​        std: *global_std
+
+​        is_scale: true
+
+​    \- Permute: {}                                                                             #channel transform from HWC to CHW
+
+  batch_size: 16                                                                               #batchsize used for test
+
+
+
+TestReader:
+
+  inputs_def:
+
+​    image_shape: [3, *train_height, *train_width]                                              #the input dimensions used in model，CHW
+
+  sample_transforms:
+
+​    \- Decode: {}                                                                              #load image
+
+​    \- TopDownEvalAffine:                                                                      #Affine class used in Eval
+
+​        trainsize: *trainsize                                                                  #the input size of model
+
+​    \- NormalizeImage:                                                                         #image normalize, the values should be same to values in TrainReader
+
+​        mean: *global_mean                                                                     #mean of normalize
+
+​        std: *global_std                                                                       #std of normalize
+
+​        is_scale: true                                                                         #whether scale by 1/255 to every image pixels，transform pixel from [0,255] to [0,1]
+
+​    \- Permute: {}                                                                             #channel transform from HWC to CHW
+
+  batch_size: 1                                                                                #Test batchsize
+
+  fuse_normalize: false                                                                        #whether fuse the normalize into model while export model, this speedup the model infer
+```
-- 
GitLab