metro3d_24kpts.yml 3.3 KB
Newer Older
Z
zhiboniu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
use_gpu: True
log_iter: 20
save_dir: output
snapshot_epoch: 3
weights: output/metro_modified/model_final
epoch: 50
metric: Pose3DEval
num_classes: 1
train_height: &train_height 224
train_width: &train_width 224
trainsize: &trainsize [*train_width, *train_height]
num_joints: &num_joints 24

#####model
architecture: METRO_Body
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/Trunc_HRNet_W32_C_pretrained.pdparams

METRO_Body:
  backbone: HRNet
  trans_encoder: TransEncoder
  num_joints: *num_joints
  loss: Pose3DLoss

HRNet:
  width: 32
  freeze_at: -1
  freeze_norm: False
  norm_momentum: 0.1
  downsample: True

TransEncoder:
  vocab_size: 30522
  num_hidden_layers: 4
  num_attention_heads: 4
  position_embeddings_size: 512
  intermediate_size: 3072
  input_feat_dim: [2048, 512, 128]
  hidden_feat_dim: [1024, 256, 128]
  attention_probs_dropout_prob: 0.1
  fc_dropout_prob: 0.1
  act_fn: 'gelu'
  output_attentions: False
  output_hidden_feats: False

Pose3DLoss:
  weight_3d: 1.0
  weight_2d: 0.0

#####optimizer
LearningRate:
  base_lr: 0.0001
  schedulers:
  - !CosineDecay
    max_epochs: 52
  - !LinearWarmup
    start_factor: 0.01
    steps: 2000


OptimizerBuilder:
  clip_grad_by_norm: 0.2
  optimizer:
    type: Adam
  regularizer:
    factor: 0.0
    type: L2


#####data
TrainDataset:
  !Pose3DDataset
    dataset_dir: dataset/traindata/
    image_dirs: ["human3.6m", "posetrack3d", "hr-lspet", "hr-lspet", "mpii/images", "coco/train2017"]
    anno_list: ["pose3d/Human3.6m_train.json", "pose3d/PoseTrack_ver01.json", "pose3d/LSPet_train_ver10.json", "pose3d/LSPet_test_ver10.json", "pose3d/MPII_ver01.json", "pose3d/COCO2014-All-ver01.json"]
    num_joints: *num_joints
    test_mode: False

EvalDataset:
  !Pose3DDataset
    dataset_dir: dataset/traindata/
    image_dirs: ["human3.6m"]
    anno_list: ["pose3d/Human3.6m_valid.json"]
    num_joints: *num_joints
    test_mode: True

TestDataset:
  !ImageFolder
    anno_path: dataset/traindata/coco/keypoint_imagelist.txt

worker_num: 4
global_mean: &global_mean [0.485, 0.456, 0.406]
global_std: &global_std [0.229, 0.224, 0.225]
TrainReader:
  sample_transforms:
    - SinglePoseAffine:
        trainsize: *trainsize
        rotate: [1.0, 30] #[prob, rotate range]
        scale: [1.0, 0.25] #[prob, scale range]
    - FlipPose:
        flip_prob: 0.5
        img_res: *train_width
        num_joints: *num_joints
    - NoiseJitter:
        noise_factor: 0.4
  batch_transforms:
    - NormalizeImage:
        mean: *global_mean
        std: *global_std
        is_scale: true
    - Permute: {}
  batch_size: 64
  shuffle: true
  drop_last: true

EvalReader:
  sample_transforms:
    - SinglePoseAffine:
        trainsize: *trainsize
        rotate: [0., 30]
        scale: [0., 0.25]
  batch_transforms:
    - NormalizeImage:
        mean: *global_mean
        std: *global_std
        is_scale: true
    - Permute: {}
  batch_size: 16
  shuffle: false
  drop_last: false

TestReader:
  inputs_def:
    image_shape: [3, *train_height, *train_width]
  sample_transforms:
    - Decode: {}
    - TopDownEvalAffine:
        trainsize: *trainsize
    - NormalizeImage:
        mean: *global_mean
        std: *global_std
        is_scale: true
    - Permute: {}
  batch_size: 1
  fuse_normalize: false #whether to fuse nomalize layer into model while export model