diff --git a/configs/keypoint/README.md b/configs/keypoint/README.md index 5165656145f6c7b8f42dd41aebe80bc5e07d9ea5..98d3e641cb757d981389ac12c7ff1e750d5b974a 100644 --- a/configs/keypoint/README.md +++ b/configs/keypoint/README.md @@ -50,6 +50,8 @@ MPII数据集 ​ 目前KeyPoint模型支持[COCO](https://cocodataset.org/#keypoints-2017)数据集和[MPII](http://human-pose.mpi-inf.mpg.de/#overview)数据集,数据集的准备方式请参考[关键点数据准备](../../docs/tutorials/PrepareKeypointDataSet_cn.md)。 +​ 关于config配置文件内容说明请参考[关键点配置文件说明](../../docs/tutorials/KeyPointConfigGuide_cn.md)。 + - 请注意,Top-Down方案使用检测框测试时,需要通过检测模型生成bbox.json文件。COCO val2017的检测结果可以参考[Detector having human AP of 56.4 on COCO val2017 dataset](https://paddledet.bj.bcebos.com/data/bbox.json),下载后放在根目录(PaddleDetection)下,然后修改config配置文件中`use_gt_bbox: False`后生效。然后正常执行测试命令即可。 diff --git a/docs/tutorials/KeyPointConfigGuide_cn.md b/docs/tutorials/KeyPointConfigGuide_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..9675849c8aef7564df4a944690ac39df7296b4c4 --- /dev/null +++ b/docs/tutorials/KeyPointConfigGuide_cn.md @@ -0,0 +1,299 @@ +**# config yaml配置项说明** + +KeyPoint 使用时config文件配置项说明,以[tinypose_256x192.yml](../../configs/keypoint/tiny_pose/tinypose_256x192.yml)为例 + +```yaml +use_gpu: true #是否使用gpu训练 + +log_iter: 5 #打印log的iter间隔 + +save_dir: output #模型保存目录 + +snapshot_epoch: 10 #保存模型epoch间隔 + +weights: output/tinypose_256x192/model_final #测试加载模型路径(不含后缀“.pdparams”) + +epoch: 420 #总训练epoch数量 + +num_joints: &num_joints 17 #关键点数量 + +pixel_std: &pixel_std 200 #变换时相对比率像素(无需关注,不动就行) + +metric: KeyPointTopDownCOCOEval #metric评估函数 + +num_classes: 1 #种类数(检测模型用,不需关注) + +train_height: &train_height 256 #模型输入尺度高度变量设置 + +train_width: &train_width 192 #模型输入尺度宽度变量设置 + +trainsize: &trainsize [*train_width, *train_height] #模型输入尺寸,使用已定义变量 + +hmsize: &hmsize [48, 64] #输出热力图尺寸(宽,高) + +flip_perm: &flip_perm [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] #左右关键点经图像翻转时对应关系,例如:图像翻转后,左手腕变成了右手腕,右手腕变成了左手腕 + + + + + +\#####model + +architecture: TopDownHRNet #模型框架结构类选择 + + + +TopDownHRNet: #TopDownHRNet相关配置 + + backbone: LiteHRNet #模型主干网络 + + post_process: HRNetPostProcess #模型后处理类 + + flip_perm: *flip_perm #同上flip_perm + + num_joints: *num_joints #关键点数量(输出通道数量) + + width: &width 40 #backbone输出通道数 + + loss: KeyPointMSELoss #loss函数选择 + + use_dark: true #是否使用DarkPose后处理 + + + +LiteHRNet: #LiteHRNet相关配置 + + network_type: wider_naive #网络结构类型选择 + + freeze_at: -1 #梯度截断branch id,截断则该branch梯度不会反传 + + freeze_norm: false #是否固定normalize层参数 + + return_idx: [0] #返回feature的branch id + + + +KeyPointMSELoss: #Loss相关配置 + + use_target_weight: true #是否使用关键点权重 + + loss_scale: 1.0 #loss比率调整,1.0表示不变 + + + +\#####optimizer + +LearningRate: #学习率相关配置 + + base_lr: 0.002 #初始基础学习率 + + schedulers: + + \- !PiecewiseDecay #衰减策略 + +​ milestones: [380, 410] #衰减时间对应epoch次数 + +​ gamma: 0.1 #衰减率 + + \- !LinearWarmup #Warmup策略 + +​ start_factor: 0.001 #warmup初始学习率比率 + +​ steps: 500 #warmup所用iter次数 + + + +OptimizerBuilder: #学习策略设置 + + optimizer: + +​ type: Adam #学习策略Adam + + regularizer: + +​ factor: 0.0 #正则项权重 + +​ type: L2 #正则类型L2/L1 + + + + + +\#####data + +TrainDataset: #训练数据集设置 + + !KeypointTopDownCocoDataset #数据加载类 + +​ image_dir: "" #图片文件夹,对应dataset_dir/image_dir + +​ anno_path: aic_coco_train_cocoformat.json #训练数据Json文件,coco格式 + +​ dataset_dir: dataset #训练数据集所在路径,image_dir、anno_path路径基于此目录 + +​ num_joints: *num_joints #关键点数量,使用已定义变量 + +​ trainsize: *trainsize #训练使用尺寸,使用已定义变量 + +​ pixel_std: *pixel_std #同上pixel_std + +​ use_gt_bbox: True #是否使用gt框 + + + + + +EvalDataset: #评估数据集设置 + + !KeypointTopDownCocoDataset #数据加载类 + +​ image_dir: val2017 #图片文件夹 + +​ anno_path: annotations/person_keypoints_val2017.json #评估数据Json文件,coco格式 + +​ dataset_dir: dataset/coco #数据集路径,image_dir、anno_path路径基于此目录 + +​ num_joints: *num_joints #关键点数量,使用已定义变量 + +​ trainsize: *trainsize #训练使用尺寸,使用已定义变量 + +​ pixel_std: *pixel_std #同上pixel_std + +​ use_gt_bbox: True #是否使用gt框,一般测试时用 + +​ image_thre: 0.5 #检测框阈值设置,测试时使用非gt_bbox时用 + + + +TestDataset: #纯测试数据集设置,无label + + !ImageFolder #数据加载类,图片文件夹类型 + +​ anno_path: dataset/coco/keypoint_imagelist.txt #测试图片列表文件 + + + +worker_num: 2 #数据加载worker数量,一般2-4,太多可能堵塞 + +global_mean: &global_mean [0.485, 0.456, 0.406] #全局均值变量设置 + +global_std: &global_std [0.229, 0.224, 0.225] #全局方差变量设置 + +TrainReader: #训练数据加载类设置 + + sample_transforms: #数据预处理变换设置 + +​ \- RandomFlipHalfBodyTransform: #随机翻转&随机半身变换类 + +​ scale: 0.25 #最大缩放尺度比例 + +​ rot: 30 #最大旋转角度 + +​ num_joints_half_body: 8 #关键点小于此数不做半身变换 + +​ prob_half_body: 0.3 #半身变换执行概率(满足关键点数量前提下) + +​ pixel_std: *pixel_std #同上pixel_std + +​ trainsize: *trainsize #训练尺度,同上trainsize + +​ upper_body_ids: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] #上半身关键点id + +​ flip_pairs: *flip_perm #左右关键点对应关系,同上flip_perm + +​ \- AugmentationbyInformantionDropping: + +​ prob_cutout: 0.5 #随机擦除变换概率 + +​ offset_factor: 0.05 #擦除位置中心点随机波动范围相对图片宽度比例 + +​ num_patch: 1 #擦除位置数量 + +​ trainsize: *trainsize #同上trainsize + +​ \- TopDownAffine: + +​ trainsize: *trainsize #同上trainsize + +​ use_udp: true #是否使用udp_unbias(flip测试使用) + +​ \- ToHeatmapsTopDown_DARK: #生成热力图gt类 + +​ hmsize: *hmsize #热力图尺寸 + +​ sigma: 2 #生成高斯核sigma值设置 + + batch_transforms: + +​ \- NormalizeImage: #图像归一化类 + +​ mean: *global_mean #均值设置,使用已有变量 + +​ std: *global_std #方差设置,使用已有变量 + +​ is_scale: true #图像元素是否除255.,即[0,255]到[0,1] + +​ \- Permute: {} #通道变换HWC->CHW,一般都需要 + + batch_size: 128 #训练时batchsize + + shuffle: true #数据集是否shuffle + + drop_last: false #数据集对batchsize取余数量是否丢弃 + + + +EvalReader: + + sample_transforms: #数据预处理变换设置,意义同TrainReader + +​ \- TopDownAffine: #Affine变换设置 + +​ trainsize: *trainsize #训练尺寸同上trainsize,使用已有变量 + +​ use_udp: true #是否使用udp_unbias,与训练需对应 + + batch_transforms: + +​ \- NormalizeImage: #图片归一化,与训练需对应 + +​ mean: *global_mean + +​ std: *global_std + +​ is_scale: true + +​ \- Permute: {} #通道变换HWC->CHW + + batch_size: 16 #测试时batchsize + + + +TestReader: + + inputs_def: + +​ image_shape: [3, *train_height, *train_width] #输入数据维度设置,CHW + + sample_transforms: + +​ \- Decode: {} #图片加载 + +​ \- TopDownEvalAffine: #Affine类,Eval时用 + +​ trainsize: *trainsize #输入图片尺度 + +​ \- NormalizeImage: #输入图像归一化 + +​ mean: *global_mean #均值 + +​ std: *global_std #方差 + +​ is_scale: true #图像元素是否除255.,即[0,255]到[0,1] + +​ \- Permute: {} #通道变换HWC->CHW + + batch_size: 1 #Test batchsize + + fuse_normalize: false #导出模型时是否内融合归一化操作(若是,预处理中可省略normalize,可以加快pipeline速度) +``` diff --git a/docs/tutorials/KeyPointConfigGuide_en.md b/docs/tutorials/KeyPointConfigGuide_en.md new file mode 100644 index 0000000000000000000000000000000000000000..fa700a28ec6752bf47e0589800226f9bbc2912f3 --- /dev/null +++ b/docs/tutorials/KeyPointConfigGuide_en.md @@ -0,0 +1,299 @@ +**# config yaml guide** + +KeyPoint config guide,Take an example of [tinypose_256x192.yml](../../configs/keypoint/tiny_pose/tinypose_256x192.yml) + +```yaml +use_gpu: true #train with gpu or not + +log_iter: 5 #print log every 5 iter + +save_dir: output #the directory to save model + +snapshot_epoch: 10 #save model every 10 epochs + +weights: output/tinypose_256x192/model_final #the weight to load(without postfix “.pdparams”) + +epoch: 420 #the total epoch number to train + +num_joints: &num_joints 17 #number of joints + +pixel_std: &pixel_std 200 #the standard pixel length(don't care) + +metric: KeyPointTopDownCOCOEval #metric function + +num_classes: 1 #number of classes(just for object detection, don't care) + +train_height: &train_height 256 #the height of model input + +train_width: &train_width 192 #the width of model input + +trainsize: &trainsize [*train_width, *train_height] #the shape of model input + +hmsize: &hmsize [48, 64] #the shape of model output + +flip_perm: &flip_perm [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] #the correspondence between left and right keypoint id, for example: left wrist become right wrist after image flip, and also the right wrist becomes left wrist + + + + + +\#####model + +architecture: TopDownHRNet #the model architecture + + + +TopDownHRNet: #TopDownHRNet configs + + backbone: LiteHRNet #which backbone to use + + post_process: HRNetPostProcess #the post_process to use + + flip_perm: *flip_perm #same to the upper "flip_perm" + + num_joints: *num_joints #the joint number(the number of output channels) + + width: &width 40 #backbone output channels + + loss: KeyPointMSELoss #loss funciton + + use_dark: true #whther to use DarkPose in postprocess + + + +LiteHRNet: #LiteHRNet configs + + network_type: wider_naive #the network type of backbone + + freeze_at: -1 #the branch match this id doesn't backward,-1 means all branch backward + + freeze_norm: false #whether to freeze normalize weights + + return_idx: [0] #the branch id to fetch features + + + +KeyPointMSELoss: #Loss configs + + use_target_weight: true #whether to use target weights + + loss_scale: 1.0 #loss weights,finalloss = loss*loss_scale + + + +\#####optimizer + +LearningRate: #LearningRate configs + + base_lr: 0.002 #the original base learning rate + + schedulers: + + \- !PiecewiseDecay #the scheduler to adjust learning rate + +​ milestones: [380, 410] #the milestones(epochs) to adjust learning rate + +​ gamma: 0.1 #the ratio to adjust learning rate, new_lr = lr*gamma + + \- !LinearWarmup #Warmup configs + +​ start_factor: 0.001 #the original ratio with respect to base_lr + +​ steps: 500 #iters used to warmup + + + +OptimizerBuilder: #Optimizer type configs + + optimizer: + +​ type: Adam #optimizer type: Adam + + regularizer: + +​ factor: 0.0 #the regularizer weight + +​ type: L2 #regularizer type: L2/L1 + + + + + +\#####data + +TrainDataset: #Train Dataset configs + + !KeypointTopDownCocoDataset #the dataset class to load data + +​ image_dir: "" #the image directory, relative to dataset_dir + +​ anno_path: aic_coco_train_cocoformat.json #the train datalist,coco format, relative to dataset_dir + +​ dataset_dir: dataset #the dataset directory, the image_dir and anno_path based on this directory + +​ num_joints: *num_joints #joint numbers + +​ trainsize: *trainsize #the input size of model + +​ pixel_std: *pixel_std #same to the upper "pixel_std" + +​ use_gt_bbox: True #whether to use gt bbox, commonly used in eval + + + + + +EvalDataset: #Eval Dataset configs + + !KeypointTopDownCocoDataset #the dataset class to load data + +​ image_dir: val2017 #the image directory, relative to dataset_dir + +​ anno_path: annotations/person_keypoints_val2017.json #the eval datalist,coco format, relative to dataset_dir + +​ dataset_dir: dataset/coco #the dataset directory, the image_dir and anno_path based on this directory + +​ num_joints: *num_joints #joint numbers + +​ trainsize: *trainsize #the input size of model + +​ pixel_std: *pixel_std #same to the upper "pixel_std" + +​ use_gt_bbox: True #whether to use gt bbox, commonly used in eval + +​ image_thre: 0.5 #the threshold of detected rect, used while use_gt_bbox is False + + + +TestDataset: #the test dataset without label + + !ImageFolder #the class to load data, find images by folder + +​ anno_path: dataset/coco/keypoint_imagelist.txt #the image list file + + + +worker_num: 2 #the workers to load Dataset + +global_mean: &global_mean [0.485, 0.456, 0.406] #means used to nomalize image + +global_std: &global_std [0.229, 0.224, 0.225] #stds used to nomalize image + +TrainReader: #TrainReader configs + + sample_transforms: #transform configs + +​ \- RandomFlipHalfBodyTransform: #random flip & random HalfBodyTransform + +​ scale: 0.25 #the maximum scale for size transform + +​ rot: 30 #the maximum rotation to transoform + +​ num_joints_half_body: 8 #the HalfBodyTransform is skiped while joints found is less than this number + +​ prob_half_body: 0.3 #the ratio of halfbody transform + +​ pixel_std: *pixel_std #same to upper "pixel_std" + +​ trainsize: *trainsize #the input size of model + +​ upper_body_ids: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] #the joint id which is belong to upper body + +​ flip_pairs: *flip_perm #same to the upper "flip_perm" + +​ \- AugmentationbyInformantionDropping: + +​ prob_cutout: 0.5 #the probability to cutout keypoint + +​ offset_factor: 0.05 #the jitter offset of cutout position, expressed as a percentage of trainwidth + +​ num_patch: 1 #the numbers of area to cutout + +​ trainsize: *trainsize #same to upper "trainsize" + +​ \- TopDownAffine: + +​ trainsize: *trainsize #same to upper "trainsize" + +​ use_udp: true #whether to use udp_unbias(just for flip eval) + +​ \- ToHeatmapsTopDown_DARK: #generate gt heatmaps + +​ hmsize: *hmsize #the size of output heatmaps + +​ sigma: 2 #the sigma of gaussin kernel which used to generate gt heatmaps + + batch_transforms: + +​ \- NormalizeImage: #image normalize class + +​ mean: *global_mean #mean of normalize + +​ std: *global_std #std of normalize + +​ is_scale: true #whether scale by 1/255 to every image pixels,transform pixel from [0,255] to [0,1] + +​ \- Permute: {} #channel transform from HWC to CHW + + batch_size: 128 #batchsize used for train + + shuffle: true #whether to shuffle the images before train + + drop_last: false #whether drop the last images which is not enogh for batchsize + + + +EvalReader: + + sample_transforms: #transform configs + +​ \- TopDownAffine: #Affine configs + +​ trainsize: *trainsize #same to upper "trainsize" + +​ use_udp: true #whether to use udp_unbias(just for flip eval) + + batch_transforms: + +​ \- NormalizeImage: #image normalize, the values should be same to values in TrainReader + +​ mean: *global_mean + +​ std: *global_std + +​ is_scale: true + +​ \- Permute: {} #channel transform from HWC to CHW + + batch_size: 16 #batchsize used for test + + + +TestReader: + + inputs_def: + +​ image_shape: [3, *train_height, *train_width] #the input dimensions used in model,CHW + + sample_transforms: + +​ \- Decode: {} #load image + +​ \- TopDownEvalAffine: #Affine class used in Eval + +​ trainsize: *trainsize #the input size of model + +​ \- NormalizeImage: #image normalize, the values should be same to values in TrainReader + +​ mean: *global_mean #mean of normalize + +​ std: *global_std #std of normalize + +​ is_scale: true #whether scale by 1/255 to every image pixels,transform pixel from [0,255] to [0,1] + +​ \- Permute: {} #channel transform from HWC to CHW + + batch_size: 1 #Test batchsize + + fuse_normalize: false #whether fuse the normalize into model while export model, this speedup the model infer +```