diff --git a/configs/pose3d/README.md b/configs/pose3d/README.md index b5da08f1cf8c450f278bbb62f15ae6a854b3a9fc..0b9dec7e9c58740bede2de59a5163fa59528094b 100644 --- a/configs/pose3d/README.md +++ b/configs/pose3d/README.md @@ -24,12 +24,12 @@ PaddleDetection 中提供了两种3D Pose算法(稀疏关键点),分别是适用于服务器端的大模型Metro3D和移动端的TinyPose3D。其中Metro3D基于[End-to-End Human Pose and Mesh Reconstruction with Transformers](https://arxiv.org/abs/2012.09760)进行了稀疏化改造,TinyPose3D是在TinyPose基础上修改输出3D关键点。 -## 模型推荐(待补充) +## 模型推荐 -|模型|适用场景|human3.6m精度|模型下载| -|:--:|:--:|:--:|:--:| -|Metro3D|服务器端|-|-| -|TinyPose3D|移动端|-|-| +|模型|适用场景|human3.6m精度(14关键点)|human3.6m精度(17关键点)|模型下载| +|:--:|:--:|:--:|:--:|:--:| +|Metro3D|服务器端|56.014|46.619|[metro3d_24kpts.pdparams](https://bj.bcebos.com/v1/paddledet/models/pose3d/metro3d_24kpts.pdparams)| +|TinyPose3D|移动端|86.381|71.223|[tinypose3d_human36m.pdparams](https://bj.bcebos.com/v1/paddledet/models/pose3d/tinypose3d_human36M.pdparams)| 注: 1. 训练数据基于 [MeshTransfomer](https://github.com/microsoft/MeshTransformer) 中的训练数据。 @@ -137,13 +137,14 @@ CUDA_VISIBLE_DEVICES=0 python3 tools/infer.py -c configs/pose3d/metro3d_24kpts.y 我们的训练数据提供了大量的低精度自动生成式的数据,用户可以在此数据训练的基础上,标注自己高精度的目标动作数据进行finetune,即可得到相对稳定较好的模型。 - 我们在医疗康复高精度数据上的训练效果展示如下 + 我们在医疗康复高精度数据上的训练效果展示如下 [高清视频](https://user-images.githubusercontent.com/31800336/218949226-22e6ab25-facb-4cc6-8eca-38d4bfd973e5.mp4)
- +
+ ## 引用 ``` diff --git a/configs/pose3d/tinypose3d_human36M.yml b/configs/pose3d/tinypose3d_human36M.yml index a3ccdbbbd588013cb87418a269f0abc9dde17dea..05c6656d145a7bb4af14bcc0a1781cf54de552b1 100644 --- a/configs/pose3d/tinypose3d_human36M.yml +++ b/configs/pose3d/tinypose3d_human36M.yml @@ -13,13 +13,12 @@ train_width: &train_width 128 trainsize: &trainsize [*train_width, *train_height] #####model -architecture: TinyPose3DHRNet +architecture: TinyPose3DHRHeatmapNet pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/keypoint/tinypose_128x96.pdparams -TinyPose3DHRNet: +TinyPose3DHRHeatmapNet: backbone: LiteHRNet post_process: HR3DNetPostProcess - fc_channel: 1024 num_joints: *num_joints width: &width 40 loss: Pose3DLoss @@ -56,17 +55,17 @@ OptimizerBuilder: #####data TrainDataset: !Pose3DDataset - dataset_dir: Human3.6M - image_dirs: ["Images"] - anno_list: ['Human3.6m_train.json'] + dataset_dir: dataset/traindata/ + image_dirs: ["human3.6m"] + anno_list: ['pose3d/Human3.6m_train.json'] num_joints: *num_joints test_mode: False EvalDataset: !Pose3DDataset - dataset_dir: Human3.6M - image_dirs: ["Images"] - anno_list: ['Human3.6m_valid.json'] + dataset_dir: dataset/traindata/ + image_dirs: ["human3.6m"] + anno_list: ['pose3d/Human3.6m_valid.json'] num_joints: *num_joints test_mode: True diff --git a/ppdet/data/source/pose3d_cmb.py b/ppdet/data/source/pose3d_cmb.py index 3c465a325d6f96d9792348689652332dbda272a1..06dbdd9e9abaf597112ea905c5d6e708caa3b132 100644 --- a/ppdet/data/source/pose3d_cmb.py +++ b/ppdet/data/source/pose3d_cmb.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -this code is base on https://github.com/open-mmlab/mmpose -""" + import os import cv2 import numpy as np @@ -80,7 +78,7 @@ class Pose3DDataset(DetDataset): mjm_mask[indices, :] = 0.0 # return mjm_mask - num_joints = 1 + num_joints = 10 mvm_mask = np.ones((num_joints, 1)).astype(np.float) if self.test_mode == False: num_vertices = num_joints diff --git a/ppdet/metrics/pose3d_metrics.py b/ppdet/metrics/pose3d_metrics.py index 32e1deb615b3f26809ca815d68c5a1b87e0a1066..ea21de90b07e8883b7e5c4717b995527331b48d6 100644 --- a/ppdet/metrics/pose3d_metrics.py +++ b/ppdet/metrics/pose3d_metrics.py @@ -137,11 +137,6 @@ def all_gather(data): class Pose3DEval(object): - """refer to - https://github.com/leoxiaobin/deep-high-resolution-net.pytorch - Copyright (c) Microsoft, under the MIT License. - """ - def __init__(self, output_eval, save_prediction_only=False): super(Pose3DEval, self).__init__() self.output_eval = output_eval diff --git a/ppdet/modeling/architectures/keypoint_hrnet.py b/ppdet/modeling/architectures/keypoint_hrnet.py index 1d93e3af5f5d4e4b0be173dd64ea37f01f7b31be..8d50502e71143dfc3dbfc28f7c9bfec912a832d0 100644 --- a/ppdet/modeling/architectures/keypoint_hrnet.py +++ b/ppdet/modeling/architectures/keypoint_hrnet.py @@ -46,7 +46,7 @@ class TopDownHRNet(BaseArch): use_dark=True): """ HRNet network, see https://arxiv.org/abs/1902.09212 - + Args: backbone (nn.Layer): backbone instance post_process (object): `HRNetPostProcess` instance @@ -132,10 +132,10 @@ class HRNetPostProcess(object): def get_max_preds(self, heatmaps): '''get predictions from score maps - + Args: heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) - + Returns: preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints @@ -220,12 +220,12 @@ class HRNetPostProcess(object): def get_final_preds(self, heatmaps, center, scale, kernelsize=3): """the highest heatvalue location with a quarter offset in the direction from the highest response to the second highest response. - + Args: heatmaps (numpy.ndarray): The predicted heatmaps center (numpy.ndarray): The boxes center scale (numpy.ndarray): The scale factor - + Returns: preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints @@ -341,10 +341,7 @@ class TinyPose3DHRHeatmapNet(BaseArch): self.deploy = False self.num_joints = num_joints - self.final_conv = L.Conv2d(width, num_joints, 1, 1, 0, bias=True) - # for heatmap output - self.final_conv_new = L.Conv2d( - width, num_joints * 32, 1, 1, 0, bias=True) + self.final_conv = L.Conv2d(width, num_joints * 32, 1, 1, 0, bias=True) @classmethod def from_config(cls, cfg, *args, **kwargs): @@ -356,20 +353,19 @@ class TinyPose3DHRHeatmapNet(BaseArch): def _forward(self): feats = self.backbone(self.inputs) # feats:[[batch_size, 40, 32, 24]] - hrnet_outputs = self.final_conv_new(feats[0]) + hrnet_outputs = self.final_conv(feats[0]) res = soft_argmax(hrnet_outputs, self.num_joints) - - if self.training: - return self.loss(res, self.inputs) - else: # export model need - return res + return res def get_loss(self): - return self._forward() + pose3d = self._forward() + loss = self.loss(pose3d, None, self.inputs) + outputs = {'loss': loss} + return outputs def get_pred(self): res_lst = self._forward() - outputs = {'keypoint': res_lst} + outputs = {'pose3d': res_lst} return outputs def flip_back(self, output_flipped, matched_parts): @@ -427,16 +423,23 @@ class TinyPose3DHRNet(BaseArch): return {'backbone': backbone, } def _forward(self): - feats = self.backbone(self.inputs) # feats:[[batch_size, 40, 32, 24]] + ''' + self.inputs is a dict + ''' + feats = self.backbone( + self.inputs) # feats:[[batch_size, 40, width/4, height/4]] + + hrnet_outputs = self.final_conv( + feats[0]) # hrnet_outputs: [batch_size, num_joints*32,32,32] - hrnet_outputs = self.final_conv(feats[0]) flatten_res = self.flatten( - hrnet_outputs) # [batch_size, 24, (height/4)*(width/4)] + hrnet_outputs) # [batch_size,num_joints*32,32*32] + res = self.fc1(flatten_res) res = self.act1(res) res = self.fc2(res) res = self.act2(res) - res = self.fc3(res) # [batch_size, 24, 3] + res = self.fc3(res) if self.training: return self.loss(res, self.inputs) @@ -448,7 +451,7 @@ class TinyPose3DHRNet(BaseArch): def get_pred(self): res_lst = self._forward() - outputs = {'keypoint': res_lst} + outputs = {'pose3d': res_lst} return outputs def flip_back(self, output_flipped, matched_parts): diff --git a/ppdet/modeling/architectures/pose3d_metro.py b/ppdet/modeling/architectures/pose3d_metro.py index b56280981ef7af0dc2cd877462941e6d09a7c3d8..4275154d137ccd838ee36cbe2f09c520d0ea3d2b 100644 --- a/ppdet/modeling/architectures/pose3d_metro.py +++ b/ppdet/modeling/architectures/pose3d_metro.py @@ -53,7 +53,7 @@ class METRO_Body(BaseArch): trans_encoder='', loss='Pose3DLoss', ): """ - METRO network, see https://arxiv.org/abs/ + Modified from METRO network, see https://arxiv.org/abs/2012.09760 Args: backbone (nn.Layer): backbone instance @@ -65,7 +65,7 @@ class METRO_Body(BaseArch): self.deploy = False self.trans_encoder = trans_encoder - self.conv_learn_tokens = paddle.nn.Conv1D(49, num_joints + 1, 1) + self.conv_learn_tokens = paddle.nn.Conv1D(49, num_joints + 10, 1) self.cam_param_fc = paddle.nn.Linear(3, 2) @classmethod