KeyPointConfigGuide_en.md 15.6 KB
Newer Older
Z
zhiboniu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
**# config yaml guide**

KeyPoint config guide,Take an example of [tinypose_256x192.yml](../../configs/keypoint/tiny_pose/tinypose_256x192.yml)

```yaml
use_gpu: true                                                                                  #train with gpu or not

log_iter: 5                                                                                    #print log every 5 iter

save_dir: output                                                                               #the directory to save model

snapshot_epoch: 10                                                                             #save model every 10 epochs

weights: output/tinypose_256x192/model_final                                                   #the weight to load(without postfix “.pdparams”)

epoch: 420                                                                                     #the total epoch number to train

num_joints: &num_joints 17                                                                     #number of joints

pixel_std: &pixel_std 200                                                                      #the standard pixel length(don't care)

metric: KeyPointTopDownCOCOEval                                                                #metric function

num_classes: 1                                                                                 #number of classes(just for object detection, don't care)

train_height: &train_height 256                                                                #the height of model input

train_width: &train_width 192                                                                  #the width of model input

trainsize: &trainsize [*train_width, *train_height]                                            #the shape of model input

hmsize: &hmsize [48, 64]                                                                       #the shape of model output

flip_perm: &flip_perm [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]  #the correspondence between left and right keypoint id, for example: left wrist become right wrist after image flip, and also the right wrist becomes left wrist





\#####model

architecture: TopDownHRNet                                                                     #the model architecture



TopDownHRNet:                                                                                  #TopDownHRNet configs

  backbone: LiteHRNet                                                                          #which backbone to use

  post_process: HRNetPostProcess                                                               #the post_process to use

  flip_perm: *flip_perm                                                                        #same to the upper "flip_perm"

  num_joints: *num_joints                                                                      #the joint number(the number of output channels)

  width: &width 40                                                                             #backbone output channels

  loss: KeyPointMSELoss                                                                        #loss funciton

  use_dark: true                                                                               #whther to use DarkPose in postprocess



LiteHRNet:                                                                                     #LiteHRNet configs

  network_type: wider_naive                                                                    #the network type of backbone

  freeze_at: -1                                                                                #the branch match this id doesn't backward,-1 means all branch backward

  freeze_norm: false                                                                           #whether to freeze normalize weights

  return_idx: [0]                                                                              #the branch id to fetch features



KeyPointMSELoss:                                                                               #Loss configs

  use_target_weight: true                                                                      #whether to use target weights

  loss_scale: 1.0                                                                              #loss weights,finalloss = loss*loss_scale



\#####optimizer

LearningRate:                                                                                  #LearningRate configs

  base_lr: 0.002                                                                               #the original base learning rate

  schedulers:

  \- !PiecewiseDecay                                                                           #the scheduler to adjust learning rate

​    milestones: [380, 410]                                                                     #the milestones(epochs) to adjust learning rate

​    gamma: 0.1                                                                                 #the ratio to adjust learning rate, new_lr = lr*gamma

  \- !LinearWarmup                                                                             #Warmup configs

​    start_factor: 0.001                                                                        #the original ratio with respect to base_lr

​    steps: 500                                                                                 #iters used to warmup



OptimizerBuilder:                                                                              #Optimizer type configs

  optimizer:

​    type: Adam                                                                                 #optimizer type: Adam

  regularizer:

​    factor: 0.0                                                                                #the regularizer weight

​    type: L2                                                                                   #regularizer type: L2/L1





\#####data

TrainDataset:                                                                                  #Train Dataset configs

  !KeypointTopDownCocoDataset                                                                  #the dataset class to load data

​    image_dir: ""                                                                              #the image directory, relative to dataset_dir

​    anno_path: aic_coco_train_cocoformat.json                                                  #the train datalist,coco format, relative to dataset_dir

​    dataset_dir: dataset                                                                       #the dataset directory, the image_dir and anno_path based on this directory

​    num_joints: *num_joints                                                                    #joint numbers

​    trainsize: *trainsize                                                                      #the input size of model

​    pixel_std: *pixel_std                                                                      #same to the upper "pixel_std"

​    use_gt_bbox: True                                                                          #whether to use gt bbox, commonly used in eval





EvalDataset:                                                                                   #Eval Dataset configs

  !KeypointTopDownCocoDataset                                                                  #the dataset class to load data

​    image_dir: val2017                                                                         #the image directory, relative to dataset_dir

​    anno_path: annotations/person_keypoints_val2017.json                                       #the eval datalist,coco format, relative to dataset_dir

​    dataset_dir: dataset/coco                                                                  #the dataset directory, the image_dir and anno_path based on this directory

​    num_joints: *num_joints                                                                    #joint numbers

​    trainsize: *trainsize                                                                      #the input size of model

​    pixel_std: *pixel_std                                                                      #same to the upper "pixel_std"

​    use_gt_bbox: True                                                                          #whether to use gt bbox, commonly used in eval

​    image_thre: 0.5                                                                            #the threshold of detected rect, used while use_gt_bbox is False



TestDataset:                                                                                   #the test dataset without label

  !ImageFolder                                                                                 #the class to load data, find images by folder

​    anno_path: dataset/coco/keypoint_imagelist.txt                                             #the image list file



worker_num: 2                                                                                  #the workers to load Dataset

C
chenxujun 已提交
178
global_mean: &global_mean [0.485, 0.456, 0.406]                                                #means used to normalize image
Z
zhiboniu 已提交
179

C
chenxujun 已提交
180
global_std: &global_std [0.229, 0.224, 0.225]                                                  #stds used to normalize image
Z
zhiboniu 已提交
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299

TrainReader:                                                                                   #TrainReader configs

  sample_transforms:                                                                           #transform configs

​    \- RandomFlipHalfBodyTransform:                                                            #random flip & random HalfBodyTransform

​        scale: 0.25                                                                            #the maximum scale for size transform

​        rot: 30                                                                                #the maximum rotation to transoform

​        num_joints_half_body: 8                                                                #the HalfBodyTransform is skiped while joints found is less than this number

​        prob_half_body: 0.3                                                                    #the ratio of halfbody transform

​        pixel_std: *pixel_std                                                                  #same to upper "pixel_std"

​        trainsize: *trainsize                                                                  #the input size of model

​        upper_body_ids: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]                                     #the joint id which is  belong to upper body

​        flip_pairs: *flip_perm                                                                 #same to the upper "flip_perm"

​    \- AugmentationbyInformantionDropping:

​        prob_cutout: 0.5                                                                       #the probability to cutout keypoint

​        offset_factor: 0.05                                                                    #the jitter offset of cutout position, expressed as a percentage of trainwidth

​        num_patch: 1                                                                           #the numbers of area to cutout

​        trainsize: *trainsize                                                                  #same to upper "trainsize"

​    \- TopDownAffine:

​        trainsize: *trainsize                                                                  #same to upper "trainsize"

​        use_udp: true                                                                          #whether to use udp_unbias(just for flip eval)

​    \- ToHeatmapsTopDown_DARK:                                                                 #generate gt heatmaps

​        hmsize: *hmsize                                                                        #the size of output heatmaps

​        sigma: 2                                                                               #the sigma of gaussin kernel which used to generate gt heatmaps

  batch_transforms:

​    \- NormalizeImage:                                                                         #image normalize class

​        mean: *global_mean                                                                     #mean of normalize

​        std: *global_std                                                                       #std of normalize

​        is_scale: true                                                                         #whether scale by 1/255 to every image pixels,transform pixel from [0,255] to [0,1]

​    \- Permute: {}                                                                             #channel transform from HWC to CHW

  batch_size: 128                                                                              #batchsize used for train

  shuffle: true                                                                                #whether to shuffle the images before train

  drop_last: false                                                                             #whether drop the last images which is not enogh for batchsize



EvalReader:

  sample_transforms:                                                                           #transform configs

​    \- TopDownAffine:                                                                          #Affine configs

​        trainsize: *trainsize                                                                  #same to upper "trainsize"

​        use_udp: true                                                                          #whether to use udp_unbias(just for flip eval)

  batch_transforms:

​    \- NormalizeImage:                                                                         #image normalize, the values should be same to values in TrainReader

​        mean: *global_mean

​        std: *global_std

​        is_scale: true

​    \- Permute: {}                                                                             #channel transform from HWC to CHW

  batch_size: 16                                                                               #batchsize used for test



TestReader:

  inputs_def:

​    image_shape: [3, *train_height, *train_width]                                              #the input dimensions used in model,CHW

  sample_transforms:

​    \- Decode: {}                                                                              #load image

​    \- TopDownEvalAffine:                                                                      #Affine class used in Eval

​        trainsize: *trainsize                                                                  #the input size of model

​    \- NormalizeImage:                                                                         #image normalize, the values should be same to values in TrainReader

​        mean: *global_mean                                                                     #mean of normalize

​        std: *global_std                                                                       #std of normalize

​        is_scale: true                                                                         #whether scale by 1/255 to every image pixels,transform pixel from [0,255] to [0,1]

​    \- Permute: {}                                                                             #channel transform from HWC to CHW

  batch_size: 1                                                                                #Test batchsize

  fuse_normalize: false                                                                        #whether fuse the normalize into model while export model, this speedup the model infer
```