diff --git a/configs/pose3d/tinypose3d_human36M.yml b/configs/pose3d/tinypose3d_human36M.yml new file mode 100644 index 0000000000000000000000000000000000000000..a3ccdbbbd588013cb87418a269f0abc9dde17dea --- /dev/null +++ b/configs/pose3d/tinypose3d_human36M.yml @@ -0,0 +1,123 @@ +use_gpu: true +log_iter: 5 +save_dir: output +snapshot_epoch: 1 +weights: output/tinypose3d_human36M/model_final +epoch: 220 +num_joints: &num_joints 24 +pixel_std: &pixel_std 200 +metric: Pose3DEval +num_classes: 1 +train_height: &train_height 128 +train_width: &train_width 128 +trainsize: &trainsize [*train_width, *train_height] + +#####model +architecture: TinyPose3DHRNet +pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/keypoint/tinypose_128x96.pdparams + +TinyPose3DHRNet: + backbone: LiteHRNet + post_process: HR3DNetPostProcess + fc_channel: 1024 + num_joints: *num_joints + width: &width 40 + loss: Pose3DLoss + +LiteHRNet: + network_type: wider_naive + freeze_at: -1 + freeze_norm: false + return_idx: [0] + +Pose3DLoss: + weight_3d: 1.0 + weight_2d: 0.0 + +#####optimizer +LearningRate: + base_lr: 0.0001 + schedulers: + - !PiecewiseDecay + milestones: [17, 21] + gamma: 0.1 + - !LinearWarmup + start_factor: 0.01 + steps: 1000 + +OptimizerBuilder: + optimizer: + type: Adam + regularizer: + factor: 0.0 + type: L2 + + +#####data +TrainDataset: + !Pose3DDataset + dataset_dir: Human3.6M + image_dirs: ["Images"] + anno_list: ['Human3.6m_train.json'] + num_joints: *num_joints + test_mode: False + +EvalDataset: + !Pose3DDataset + dataset_dir: Human3.6M + image_dirs: ["Images"] + anno_list: ['Human3.6m_valid.json'] + num_joints: *num_joints + test_mode: True + +TestDataset: + !ImageFolder + anno_path: dataset/coco/keypoint_imagelist.txt + +worker_num: 4 +global_mean: &global_mean [0.485, 0.456, 0.406] +global_std: &global_std [0.229, 0.224, 0.225] +TrainReader: + sample_transforms: + - SinglePoseAffine: + trainsize: *trainsize + rotate: [0.5, 30] #[prob, rotate range] + scale: [0.5, 0.25] #[prob, scale range] + batch_transforms: + - NormalizeImage: + mean: *global_mean + std: *global_std + is_scale: true + - Permute: {} + batch_size: 128 + shuffle: true + drop_last: true + +EvalReader: + sample_transforms: + - SinglePoseAffine: + trainsize: *trainsize + rotate: [0., 30] + scale: [0., 0.25] + batch_transforms: + - NormalizeImage: + mean: *global_mean + std: *global_std + is_scale: true + - Permute: {} + batch_size: 128 + +TestReader: + inputs_def: + image_shape: [3, *train_height, *train_width] + sample_transforms: + - Decode: {} + - TopDownEvalAffine: + trainsize: *trainsize + - NormalizeImage: + mean: *global_mean + std: *global_std + is_scale: true + - Permute: {} + batch_size: 1 + fuse_normalize: false diff --git a/ppdet/modeling/architectures/keypoint_hrnet.py b/ppdet/modeling/architectures/keypoint_hrnet.py index fa3541d7d783b70fab8eb28dbdd8914b7394f6b4..1d93e3af5f5d4e4b0be173dd64ea37f01f7b31be 100644 --- a/ppdet/modeling/architectures/keypoint_hrnet.py +++ b/ppdet/modeling/architectures/keypoint_hrnet.py @@ -394,6 +394,7 @@ class TinyPose3DHRNet(BaseArch): def __init__(self, width, num_joints, + fc_channel=768, backbone='HRNet', loss='KeyPointRegressionMSELoss', post_process=TinyPose3DPostProcess): @@ -411,21 +412,13 @@ class TinyPose3DHRNet(BaseArch): self.final_conv = L.Conv2d(width, num_joints, 1, 1, 0, bias=True) - self.final_conv_new = L.Conv2d( - width, num_joints * 32, 1, 1, 0, bias=True) - self.flatten = paddle.nn.Flatten(start_axis=2, stop_axis=3) - self.fc1 = paddle.nn.Linear(768, 256) + self.fc1 = paddle.nn.Linear(fc_channel, 256) self.act1 = paddle.nn.ReLU() self.fc2 = paddle.nn.Linear(256, 64) self.act2 = paddle.nn.ReLU() self.fc3 = paddle.nn.Linear(64, 3) - # for human3.6M - self.fc1_1 = paddle.nn.Linear(3136, 1024) - self.fc2_1 = paddle.nn.Linear(1024, 256) - self.fc3_1 = paddle.nn.Linear(256, 3) - @classmethod def from_config(cls, cfg, *args, **kwargs): # backbone