From e6324fd544175a3787b28a31c87da5a9ae03a018 Mon Sep 17 00:00:00 2001
From: zhiboniu <31800336+zhiboniu@users.noreply.github.com>
Date: Tue, 28 Feb 2023 18:48:32 +0800
Subject: [PATCH] adapted between higherhrnet and petr (#7840)

* new adapted

* test ok
---
 .../higherhrnet/higherhrnet_hrnet_w32_512.yml  |  8 +++++++-
 .../higherhrnet_hrnet_w32_512_swahr.yml        |  8 +++++++-
 .../higherhrnet/higherhrnet_hrnet_w32_640.yml  | 10 ++++++++--
 .../keypoint/petr/petr_resnet50_16x2_coco.yml  |  3 +--
 ppdet/data/transform/keypoint_operators.py     | 18 +++++++++---------
 ppdet/utils/visualizer.py                      |  2 +-
 6 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512.yml b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512.yml
index 7cea9d4a2..5dedfb32b 100644
--- a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512.yml
+++ b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512.yml
@@ -66,6 +66,9 @@ TrainDataset:
     anno_path: annotations/person_keypoints_train2017.json
     dataset_dir: dataset/coco
     num_joints: *num_joints
+    return_bbox: False
+    return_area: False
+    return_class: False
 
 EvalDataset:
   !KeypointBottomUpCocoDataset
@@ -74,6 +77,9 @@ EvalDataset:
     dataset_dir: dataset/coco
     num_joints: *num_joints
     test_mode: true
+    return_bbox: False
+    return_area: False
+    return_class: False
 
 TestDataset:
   !ImageFolder
@@ -88,7 +94,7 @@ TrainReader:
         max_degree: 30
         scale: [0.75, 1.5]
         max_shift: 0.2
-        trainsize: *input_size
+        trainsize: [*input_size, *input_size]
         hmsize: [*hm_size, *hm_size_2x]
     - KeyPointFlip:
         flip_prob: 0.5
diff --git a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512_swahr.yml b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512_swahr.yml
index 2677d20bc..7b0f7560a 100644
--- a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512_swahr.yml
+++ b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512_swahr.yml
@@ -67,6 +67,9 @@ TrainDataset:
     anno_path: annotations/person_keypoints_train2017.json
     dataset_dir: dataset/coco
     num_joints: *num_joints
+    return_bbox: False
+    return_area: False
+    return_class: False
 
 EvalDataset:
   !KeypointBottomUpCocoDataset
@@ -75,6 +78,9 @@ EvalDataset:
     dataset_dir: dataset/coco
     num_joints: *num_joints
     test_mode: true
+    return_bbox: False
+    return_area: False
+    return_class: False
 
 TestDataset:
   !ImageFolder
@@ -89,7 +95,7 @@ TrainReader:
         max_degree: 30
         scale: [0.75, 1.5]
         max_shift: 0.2
-        trainsize: *input_size
+        trainsize: [*input_size, *input_size]
         hmsize: [*hm_size, *hm_size_2x]
     - KeyPointFlip:
         flip_prob: 0.5
diff --git a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_640.yml b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_640.yml
index 7cbeb01d1..edd66e55d 100644
--- a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_640.yml
+++ b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_640.yml
@@ -66,6 +66,9 @@ TrainDataset:
     anno_path: annotations/person_keypoints_train2017.json
     dataset_dir: dataset/coco
     num_joints: *num_joints
+    return_bbox: False
+    return_area: False
+    return_class: False
 
 EvalDataset:
   !KeypointBottomUpCocoDataset
@@ -74,12 +77,15 @@ EvalDataset:
     dataset_dir: dataset/coco
     num_joints: *num_joints
     test_mode: true
+    return_bbox: False
+    return_area: False
+    return_class: False
 
 TestDataset:
   !ImageFolder
     anno_path: dataset/coco/keypoint_imagelist.txt
 
-worker_num: 0
+worker_num: 8
 global_mean: &global_mean [0.485, 0.456, 0.406]
 global_std: &global_std [0.229, 0.224, 0.225]
 TrainReader:
@@ -88,7 +94,7 @@ TrainReader:
         max_degree: 30
         scale: [0.75, 1.5]
         max_shift: 0.2
-        trainsize: *input_size
+        trainsize: [*input_size, *input_size]
         hmsize: [*hm_size, *hm_size_2x]
     - KeyPointFlip:
         flip_prob: 0.5
diff --git a/configs/keypoint/petr/petr_resnet50_16x2_coco.yml b/configs/keypoint/petr/petr_resnet50_16x2_coco.yml
index d6415ad3b..a97eff63a 100644
--- a/configs/keypoint/petr/petr_resnet50_16x2_coco.yml
+++ b/configs/keypoint/petr/petr_resnet50_16x2_coco.yml
@@ -245,8 +245,7 @@ EvalReader:
 TestReader:
   sample_transforms:
     - Decode: {}
-    - EvalAffine:
-        size: *trainsize
+    - EvalAffine: {size: 800}
     - NormalizeImage:
         mean: *global_mean
         std: *global_std
diff --git a/ppdet/data/transform/keypoint_operators.py b/ppdet/data/transform/keypoint_operators.py
index 24cf63b88..fea23d696 100644
--- a/ppdet/data/transform/keypoint_operators.py
+++ b/ppdet/data/transform/keypoint_operators.py
@@ -76,7 +76,7 @@ class KeyPointFlip(object):
         '''
         records['gt_joints'] is Sequence in higherhrnet
         '''
-        if not ('gt_joints' in records and records['gt_joints'].size > 0):
+        if not ('gt_joints' in records and len(records['gt_joints']) > 0):
             return records
 
         kpts_lst = records['gt_joints']
@@ -147,7 +147,7 @@ class RandomAffine(object):
         max_scale (list[2]): the scale range to apply, transform range is [min, max]
         max_shift (float): the max abslute shift ratio to apply, transform range is [-max_shift*imagesize, max_shift*imagesize]
         hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
-        trainsize (int): the standard length used to train, the 'scale_type' of [h,w] will be resize to trainsize for standard
+        trainsize (list[2]): the standard length used to train, the 'scale_type' of [h,w] will be resize to trainsize for standard
         scale_type (str): the length of [h,w] to used for trainsize, chosed between 'short' and 'long'
         records(dict): the dict contained the image, mask and coords
 
@@ -161,7 +161,7 @@ class RandomAffine(object):
                  scale=[0.75, 1.5],
                  max_shift=0.2,
                  hmsize=None,
-                 trainsize=512,
+                 trainsize=[512, 512],
                  scale_type='short',
                  boldervalue=[114, 114, 114]):
         super(RandomAffine, self).__init__()
@@ -304,7 +304,7 @@ class RandomAffine(object):
         input_size = 2 * center
         if self.trainsize != -1:
             dsize = self.trainsize
-            imgshape = (dsize, dsize)
+            imgshape = (dsize)
         else:
             dsize = scale
             imgshape = (shape.tolist())
@@ -379,6 +379,7 @@ class EvalAffine(object):
         if 'gt_joints' in records:
             del records['gt_joints']
         records['image'] = image_resized
+        records['scale_factor'] = self.size / min(h, w)
         return records
 
 
@@ -1574,14 +1575,13 @@ class PETR_Resize:
             dict: Resized results, 'im_shape', 'pad_shape', 'scale_factor', \
                 'keep_ratio' keys are added into result dict.
         """
-
         if 'scale' not in results:
             if 'scale_factor' in results:
                 img_shape = results['image'].shape[:2]
-                scale_factor = results['scale_factor']
-                assert isinstance(scale_factor, float)
-                results['scale'] = tuple(
-                    [int(x * scale_factor) for x in img_shape][::-1])
+                scale_factor = results['scale_factor'][0]
+                # assert isinstance(scale_factor, float)
+                results['scale'] = [int(x * scale_factor)
+                                    for x in img_shape][::-1]
             else:
                 self._random_scale(results)
         else:
diff --git a/ppdet/utils/visualizer.py b/ppdet/utils/visualizer.py
index 1c8560a74..f7193306c 100644
--- a/ppdet/utils/visualizer.py
+++ b/ppdet/utils/visualizer.py
@@ -238,7 +238,7 @@ def draw_pose(image,
                      'for example: `pip install matplotlib`.')
         raise e
 
-    skeletons = np.array([item['keypoints'] for item in results]).reshape((-1, 51))
+    skeletons = np.array([item['keypoints'] for item in results])
     kpt_nums = 17
     if len(skeletons) > 0:
         kpt_nums = int(skeletons.shape[1] / 3)
-- 
GitLab