Update hadnpose preprocess (#141)

* update preprocess of handpose estimation to handle some conner cases * back PALM_BOX_ENLARGE_FACTOR to default

Update hadnpose preprocess (#141)
* update preprocess of handpose estimation to handle some conner cases * back PALM_BOX_ENLARGE_FACTOR to default
e06fd7dc · Wanli · GitHub · 3664c925 · e06fd7dc
隐藏空白更改
内联并排

Showing with 64 addition and 36 deletion

models/handpose_estimation_mediapipe/mp_handpose.py models/handpose_estimation_mediapipe/mp_handpose.py +64 -36

未找到文件。
--- a/models/handpose_estimation_mediapipe/mp_handpose.py
+++ b/models/handpose_estimation_mediapipe/mp_handpose.py
@@ -13,6 +13,8 @@ class MPHandPose:
        self.PALM_LANDMARK_IDS = [0, 5, 9, 13, 17, 1, 2]
        self.PALM_LANDMARKS_INDEX_OF_PALM_BASE = 0
        self.PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2
+        self.PALM_BOX_PRE_SHIFT_VECTOR = [0, 0]
+        self.PALM_BOX_PRE_ENLARGE_FACTOR = 4
        self.PALM_BOX_SHIFT_VECTOR = [0, -0.4]
        self.PALM_BOX_ENLARGE_FACTOR = 3
        self.HAND_BOX_SHIFT_VECTOR = [0, -0.1]
@@ -34,6 +36,48 @@ class MPHandPose:
        self.target_id = targetId
        self.model.setPreferableTarget(self.target_id)
+    def _cropAndPadFromPalm(self, image, palm_bbox, for_rotation = False):
+        # shift bounding box
+        wh_palm_bbox = palm_bbox[1] - palm_bbox[0]
+        if for_rotation:
+            shift_vector = self.PALM_BOX_PRE_SHIFT_VECTOR
+        else:
+            shift_vector = self.PALM_BOX_SHIFT_VECTOR
+        shift_vector = shift_vector * wh_palm_bbox
+        palm_bbox = palm_bbox + shift_vector
+        # enlarge bounding box
+        center_palm_bbox = np.sum(palm_bbox, axis=0) / 2
+        wh_palm_bbox = palm_bbox[1] - palm_bbox[0]
+        if for_rotation:
+            enlarge_scale = self.PALM_BOX_PRE_ENLARGE_FACTOR
+        else:
+            enlarge_scale = self.PALM_BOX_ENLARGE_FACTOR
+        new_half_size = wh_palm_bbox * enlarge_scale / 2
+        palm_bbox = np.array([
+            center_palm_bbox - new_half_size,
+            center_palm_bbox + new_half_size])
+        palm_bbox = palm_bbox.astype(np.int32)
+        palm_bbox[:][0] = np.clip(palm_bbox[:][0], 0, image.shape[0])
+        palm_bbox[:][1] = np.clip(palm_bbox[:][1], 0, image.shape[1])
+        # crop to the size of interest
+        image = image[palm_bbox[0][1]:palm_bbox[1][1], palm_bbox[0][0]:palm_bbox[1][0], :]
+        # pad to ensure conner pixels won't be cropped
+        if for_rotation:
+            side_len = np.linalg.norm(image.shape[:2])
+        else:
+            side_len = max(image.shape[:2])
+        side_len = int(side_len)
+        pad_h = side_len - image.shape[0]
+        pad_w = side_len - image.shape[1]
+        left = pad_w // 2
+        top = pad_h // 2
+        right = pad_w - left
+        bottom = pad_h - top
+        image = cv.copyMakeBorder(image, top, bottom, left, right, cv.BORDER_CONSTANT, None, (0, 0, 0))
+        bias = palm_bbox[0] - [left, top]
+        return image, palm_bbox, bias
    def _preprocess(self, image, palm):
        '''
        Rotate input for inference.
@@ -43,14 +87,22 @@ class MPHandPose:
          palm_landmarks - 7 landmarks (5 finger base points, 2 palm base points) of shape [7, 2]
        Returns:
          rotated_hand - rotated hand image for inference
+          rotate_palm_bbox - palm box of interest range
+          angle - rotate angle for hand
          rotation_matrix - matrix for rotation and de-rotation
+          pad_bias - pad pixels of interest range
        '''
-        # Rotate input to have vertically oriented hand image
+        # crop and pad image to interest range
-        #  compute rotation
+        pad_bias = np.array([0, 0], dtype=np.int32)  # left, top
        palm_bbox = palm[0:4].reshape(2, 2)
-        palm_landmarks = palm[4:18].reshape(7, 2)
+        image, palm_bbox, bias = self._cropAndPadFromPalm(image, palm_bbox, True)
        image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
+        pad_bias += bias
+        # Rotate input to have vertically oriented hand image
+        # compute rotation
+        palm_bbox -= pad_bias
+        palm_landmarks = palm[4:18].reshape(7, 2) - pad_bias
        p1 = palm_landmarks[self.PALM_LANDMARKS_INDEX_OF_PALM_BASE]
        p2 = palm_landmarks[self.PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE]
        radians = np.pi / 2 - np.arctan2(-(p2[1] - p1[1]), p2[0] - p1[0])
@@ -72,49 +124,25 @@ class MPHandPose:
            np.amin(rotated_palm_landmarks, axis=1),
            np.amax(rotated_palm_landmarks, axis=1)])  # [top-left, bottom-right]
-        #  shift bounding box
+        crop, rotated_palm_bbox, _ = self._cropAndPadFromPalm(rotated_image, rotated_palm_bbox)
-        wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0]
+        blob = cv.resize(crop, dsize=self.input_size, interpolation=cv.INTER_AREA).astype(np.float32)
-        shift_vector = self.PALM_BOX_SHIFT_VECTOR * wh_rotated_palm_bbox
+        blob = blob / 255.
-        rotated_palm_bbox = rotated_palm_bbox + shift_vector
-        #  squarify bounding boxx
-        center_rotated_plam_bbox = np.sum(rotated_palm_bbox, axis=0) / 2
-        wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0]
-        new_half_size = np.amax(wh_rotated_palm_bbox) / 2
-        rotated_palm_bbox = np.array([
-            center_rotated_plam_bbox - new_half_size,
-            center_rotated_plam_bbox + new_half_size])
-        #  enlarge bounding box
-        center_rotated_plam_bbox = np.sum(rotated_palm_bbox, axis=0) / 2
-        wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0]
-        new_half_size = wh_rotated_palm_bbox * self.PALM_BOX_ENLARGE_FACTOR / 2
-        rotated_palm_bbox = np.array([
-            center_rotated_plam_bbox - new_half_size,
-            center_rotated_plam_bbox + new_half_size])
-        # Crop and resize the rotated image by the bounding box
-        [[x1, y1], [x2, y2]] = rotated_palm_bbox.astype(np.int32)
-        diff = np.maximum([-x1, -y1, x2 - rotated_image.shape[1], y2 - rotated_image.shape[0]], 0)
-        [x1, y1, x2, y2] = [x1, y1, x2, y2] + diff
-        crop = rotated_image[y1:y2, x1:x2, :]
-        crop = cv.copyMakeBorder(crop, diff[1], diff[3], diff[0], diff[2], cv.BORDER_CONSTANT, value=(0, 0, 0))
-        blob = cv.resize(crop, dsize=self.input_size, interpolation=cv.INTER_AREA).astype(np.float32) / 255.0
-        return blob[np.newaxis, :, :, :], rotated_palm_bbox, angle, rotation_matrix
+        return blob[np.newaxis, :, :, :], rotated_palm_bbox, angle, rotation_matrix, pad_bias
    def infer(self, image, palm):
        # Preprocess
-        input_blob, rotated_palm_bbox, angle, rotation_matrix = self._preprocess(image, palm)
+        input_blob, rotated_palm_bbox, angle, rotation_matrix, pad_bias = self._preprocess(image, palm)
        # Forward
        self.model.setInput(input_blob)
        output_blob = self.model.forward(self.model.getUnconnectedOutLayersNames())
        # Postprocess
-        results = self._postprocess(output_blob, rotated_palm_bbox, angle, rotation_matrix)
+        results = self._postprocess(output_blob, rotated_palm_bbox, angle, rotation_matrix, pad_bias)
        return results # [bbox_coords, landmarks_coords, conf]
-    def _postprocess(self, blob, rotated_palm_bbox, angle, rotation_matrix):
+    def _postprocess(self, blob, rotated_palm_bbox, angle, rotation_matrix, pad_bias):
        landmarks, conf, handedness, landmarks_word = blob
        conf = conf[0][0]
@@ -127,7 +155,7 @@ class MPHandPose:
        # transform coords back to the input coords
        wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0]
        scale_factor = wh_rotated_palm_bbox / self.input_size
-        landmarks[:, :2] = (landmarks[:, :2] - self.input_size / 2) * scale_factor
+        landmarks[:, :2] = (landmarks[:, :2] - self.input_size / 2) * max(scale_factor)
        landmarks[:, 2] = landmarks[:, 2] * max(scale_factor) # depth scaling
        coords_rotation_matrix = cv.getRotationMatrix2D((0, 0), angle, 1.0)
        rotated_landmarks = np.dot(landmarks[:, :2], coords_rotation_matrix[:, :2])
@@ -149,7 +177,7 @@ class MPHandPose:
        original_center = np.array([
            np.dot(center, inverse_rotation_matrix[0]),
            np.dot(center, inverse_rotation_matrix[1])])
-        landmarks[:, :2] = rotated_landmarks[:, :2] + original_center
+        landmarks[:, :2] = rotated_landmarks[:, :2] + original_center + pad_bias
        # get bounding box from rotated_landmarks
        bbox = np.array([