adapted between higherhrnet and petr (#7840)

* new adapted * test ok

adapted between higherhrnet and petr (#7840)
* new adapted * test ok
e6324fd5 · zhiboniu · GitHub · bec57bcf · e6324fd5 · e6324fd5
6 changed file
--- a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512.yml
+++ b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512.yml
@@ -66,6 +66,9 @@ TrainDataset:
    anno_path: annotations/person_keypoints_train2017.json
    dataset_dir: dataset/coco
    num_joints: *num_joints
+    return_bbox: False
+    return_area: False
+    return_class: False
 EvalDataset:
  !KeypointBottomUpCocoDataset
@@ -74,6 +77,9 @@ EvalDataset:
    dataset_dir: dataset/coco
    num_joints: *num_joints
    test_mode: true
+    return_bbox: False
+    return_area: False
+    return_class: False
 TestDataset:
  !ImageFolder
@@ -88,7 +94,7 @@ TrainReader:
        max_degree: 30
        scale: [0.75, 1.5]
        max_shift: 0.2
-        trainsize: *input_size
+        trainsize: [*input_size, *input_size]
        hmsize: [*hm_size, *hm_size_2x]
    - KeyPointFlip:
        flip_prob: 0.5

--- a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512_swahr.yml
+++ b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512_swahr.yml
@@ -67,6 +67,9 @@ TrainDataset:
    anno_path: annotations/person_keypoints_train2017.json
    dataset_dir: dataset/coco
    num_joints: *num_joints
+    return_bbox: False
+    return_area: False
+    return_class: False
 EvalDataset:
  !KeypointBottomUpCocoDataset
@@ -75,6 +78,9 @@ EvalDataset:
    dataset_dir: dataset/coco
    num_joints: *num_joints
    test_mode: true
+    return_bbox: False
+    return_area: False
+    return_class: False
 TestDataset:
  !ImageFolder
@@ -89,7 +95,7 @@ TrainReader:
        max_degree: 30
        scale: [0.75, 1.5]
        max_shift: 0.2
-        trainsize: *input_size
+        trainsize: [*input_size, *input_size]
        hmsize: [*hm_size, *hm_size_2x]
    - KeyPointFlip:
        flip_prob: 0.5

--- a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_640.yml
+++ b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_640.yml
@@ -66,6 +66,9 @@ TrainDataset:
    anno_path: annotations/person_keypoints_train2017.json
    dataset_dir: dataset/coco
    num_joints: *num_joints
+    return_bbox: False
+    return_area: False
+    return_class: False
 EvalDataset:
  !KeypointBottomUpCocoDataset
@@ -74,12 +77,15 @@ EvalDataset:
    dataset_dir: dataset/coco
    num_joints: *num_joints
    test_mode: true
+    return_bbox: False
+    return_area: False
+    return_class: False
 TestDataset:
  !ImageFolder
    anno_path: dataset/coco/keypoint_imagelist.txt
-worker_num: 0
+worker_num: 8
 global_mean: &global_mean [0.485, 0.456, 0.406]
 global_std: &global_std [0.229, 0.224, 0.225]
 TrainReader:
@@ -88,7 +94,7 @@ TrainReader:
        max_degree: 30
        scale: [0.75, 1.5]
        max_shift: 0.2
-        trainsize: *input_size
+        trainsize: [*input_size, *input_size]
        hmsize: [*hm_size, *hm_size_2x]
    - KeyPointFlip:
        flip_prob: 0.5

--- a/configs/keypoint/petr/petr_resnet50_16x2_coco.yml
+++ b/configs/keypoint/petr/petr_resnet50_16x2_coco.yml
@@ -245,8 +245,7 @@ EvalReader:
 TestReader:
  sample_transforms:
    - Decode: {}
-    - EvalAffine:
+    - EvalAffine: {size: 800}
-        size: *trainsize
    - NormalizeImage:
        mean: *global_mean
        std: *global_std

--- a/ppdet/data/transform/keypoint_operators.py
+++ b/ppdet/data/transform/keypoint_operators.py
@@ -76,7 +76,7 @@ class KeyPointFlip(object):
        '''
        records['gt_joints'] is Sequence in higherhrnet
        '''
-        if not ('gt_joints' in records and records['gt_joints'].size > 0):
+        if not ('gt_joints' in records and len(records['gt_joints']) > 0):
            return records
        kpts_lst = records['gt_joints']
@@ -147,7 +147,7 @@ class RandomAffine(object):
        max_scale (list[2]): the scale range to apply, transform range is [min, max]
        max_shift (float): the max abslute shift ratio to apply, transform range is [-max_shift*imagesize, max_shift*imagesize]
        hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
-        trainsize (int): the standard length used to train, the 'scale_type' of [h,w] will be resize to trainsize for standard
+        trainsize (list[2]): the standard length used to train, the 'scale_type' of [h,w] will be resize to trainsize for standard
        scale_type (str): the length of [h,w] to used for trainsize, chosed between 'short' and 'long'
        records(dict): the dict contained the image, mask and coords
@@ -161,7 +161,7 @@ class RandomAffine(object):
                 scale=[0.75, 1.5],
                 max_shift=0.2,
                 hmsize=None,
-                 trainsize=512,
+                 trainsize=[512, 512],
                 scale_type='short',
                 boldervalue=[114, 114, 114]):
        super(RandomAffine, self).__init__()
@@ -304,7 +304,7 @@ class RandomAffine(object):
        input_size = 2 * center
        if self.trainsize != -1:
            dsize = self.trainsize
-            imgshape = (dsize, dsize)
+            imgshape = (dsize)
        else:
            dsize = scale
            imgshape = (shape.tolist())
@@ -379,6 +379,7 @@ class EvalAffine(object):
        if 'gt_joints' in records:
            del records['gt_joints']
        records['image'] = image_resized
+        records['scale_factor'] = self.size / min(h, w)
        return records
@@ -1574,14 +1575,13 @@ class PETR_Resize:
            dict: Resized results, 'im_shape', 'pad_shape', 'scale_factor', \
                'keep_ratio' keys are added into result dict.
        """
        if 'scale' not in results:
            if 'scale_factor' in results:
                img_shape = results['image'].shape[:2]
-                scale_factor = results['scale_factor']
+                scale_factor = results['scale_factor'][0]
-                assert isinstance(scale_factor, float)
+                # assert isinstance(scale_factor, float)
-                results['scale'] = tuple(
+                results['scale'] = [int(x * scale_factor)
-                    [int(x * scale_factor) for x in img_shape][::-1])
+                                    for x in img_shape][::-1]
            else:
                self._random_scale(results)
        else:

--- a/ppdet/utils/visualizer.py
+++ b/ppdet/utils/visualizer.py
@@ -238,7 +238,7 @@ def draw_pose(image,
                     'for example: `pip install matplotlib`.')
        raise e
-    skeletons = np.array([item['keypoints'] for item in results]).reshape((-1, 51))
+    skeletons = np.array([item['keypoints'] for item in results])
    kpt_nums = 17
    if len(skeletons) > 0:
        kpt_nums = int(skeletons.shape[1] / 3)