adapt keypoint detection for deploy (#1899)

ef65c7e1 · Guanghua Yu · GitHub · 8a083263 · ef65c7e1 · ef65c7e1
6 changed file
--- a/configs/face_detection/blazeface_keypoint.yml
+++ b/configs/face_detection/blazeface_keypoint.yml
@@ -9,6 +9,7 @@ save_dir: output
 weights: output/blazeface_keypoint/model_final.pdparams
 # 1(label_class) + 1(background)
 num_classes: 2
+with_lmk: true
 BlazeFace:
  backbone: BlazeNet
@@ -19,7 +20,6 @@ BlazeFace:
    score_threshold: 0.01
  min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]]
  use_density_prior_box: false
-  with_lmk: true
  lmk_loss:
    overlap_threshold: 0.35
    neg_overlap: 0.35
@@ -103,12 +103,11 @@ EvalReader:
  - !DecodeImage
    to_rgb: true
  - !NormalizeBox {}
+  - !Permute {}
  - !NormalizeImage
-    is_channel_first: false
    is_scale: false
-    mean: [123, 117, 104]
+    mean: [104, 117, 123]
    std: [127.502231, 127.502231, 127.502231]
-  - !Permute {}
  batch_size: 1
 TestReader:
@@ -120,10 +119,12 @@ TestReader:
  sample_transforms:
  - !DecodeImage
    to_rgb: true
+  - !ResizeImage
+    target_size: 640
+    interp: 1
+  - !Permute {}
  - !NormalizeImage
-    is_channel_first: false
    is_scale: false
-    mean: [123, 117, 104]
+    mean: [104, 117, 123]
    std: [127.502231, 127.502231, 127.502231]
-  - !Permute {}
  batch_size: 1
--- a/deploy/python/infer.py
+++ b/deploy/python/infer.py
@@ -34,8 +34,7 @@ import numpy as np
 import paddle
 import paddle.fluid as fluid
 from preprocess import preprocess, Resize, Normalize, Permute, PadStride
-from visualize import visualize_box_mask
+from visualize import visualize_box_mask, lmk2out
-from ppdet.utils.check import enable_static_mode
 # Global dictionary
 SUPPORT_MODELS = {
@@ -90,9 +89,12 @@ class Detector(object):
        inputs = create_inputs(im, im_info, self.config.arch)
        return inputs, im_info
-    def postprocess(self, np_boxes, np_masks, im_info, threshold=0.5):
+    def postprocess(self, np_boxes, np_masks, np_lmk, im_info, threshold=0.5):
        # postprocess output of predictor
        results = {}
+        if np_lmk is not None:
+            results['landmark'] = lmk2out(np_boxes, np_lmk, im_info, threshold)
        if self.config.arch in ['SSD', 'Face']:
            w, h = im_info['origin_shape']
            np_boxes[:, 2] *= h
@@ -129,7 +131,7 @@ class Detector(object):
                            shape:[N, class_num, mask_resolution, mask_resolution]
        '''
        inputs, im_info = self.preprocess(image)
-        np_boxes, np_masks = None, None
+        np_boxes, np_masks, np_lmk = None, None, None
        if self.config.use_python_inference:
            for i in range(warmup):
                outs = self.executor.run(self.program,
@@ -164,6 +166,17 @@ class Detector(object):
                        output_names[1])
                    np_masks = masks_tensor.copy_to_cpu()
+                if self.config.with_lmk is not None and self.config.with_lmk == True:
+                    face_index = self.predictor.get_output_tensor(output_names[
+                        1])
+                    landmark = self.predictor.get_output_tensor(output_names[2])
+                    prior_boxes = self.predictor.get_output_tensor(output_names[
+                        3])
+                    np_face_index = face_index.copy_to_cpu()
+                    np_prior_boxes = prior_boxes.copy_to_cpu()
+                    np_landmark = landmark.copy_to_cpu()
+                    np_lmk = [np_face_index, np_landmark, np_prior_boxes]
            t1 = time.time()
            for i in range(repeats):
                self.predictor.zero_copy_run()
@@ -174,6 +187,17 @@ class Detector(object):
                    masks_tensor = self.predictor.get_output_tensor(
                        output_names[1])
                    np_masks = masks_tensor.copy_to_cpu()
+                if self.config.with_lmk is not None and self.config.with_lmk == True:
+                    face_index = self.predictor.get_output_tensor(output_names[
+                        1])
+                    landmark = self.predictor.get_output_tensor(output_names[2])
+                    prior_boxes = self.predictor.get_output_tensor(output_names[
+                        3])
+                    np_face_index = face_index.copy_to_cpu()
+                    np_prior_boxes = prior_boxes.copy_to_cpu()
+                    np_landmark = landmark.copy_to_cpu()
+                    np_lmk = [np_face_index, np_landmark, np_prior_boxes]
            t2 = time.time()
            ms = (t2 - t1) * 1000.0 / repeats
            print("Inference: {} ms per batch image".format(ms))
@@ -186,7 +210,7 @@ class Detector(object):
                results = {'boxes': np.array([])}
            else:
                results = self.postprocess(
-                    np_boxes, np_masks, im_info, threshold=threshold)
+                    np_boxes, np_masks, np_lmk, im_info, threshold=threshold)
        return results
@@ -325,6 +349,9 @@ class Config():
        self.mask_resolution = None
        if 'mask_resolution' in yml_conf:
            self.mask_resolution = yml_conf['mask_resolution']
+        self.with_lmk = None
+        if 'with_lmk' in yml_conf:
+            self.with_lmk = yml_conf['with_lmk']
        self.print_config()
    def check_model(self, yml_conf):
@@ -522,7 +549,10 @@ def main():
 if __name__ == '__main__':
-    enable_static_mode()
+    try:
+        paddle.enable_static()
+    except:
+        pass
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--model_dir",

--- a/deploy/python/visualize.py
+++ b/deploy/python/visualize.py
@@ -56,6 +56,8 @@ def visualize_box_mask(im, results, labels, mask_resolution=14, threshold=0.5):
            results['score'],
            labels,
            threshold=threshold)
+    if 'landmark' in results:
+        im = draw_lmk(im, results['landmark'])
    return im
@@ -247,3 +249,50 @@ def draw_segm(im,
            1,
            lineType=cv2.LINE_AA)
    return Image.fromarray(im.astype('uint8'))
+def lmk2out(bboxes, np_lmk, im_info, threshold=0.5, is_bbox_normalized=True):
+    image_w, image_h = im_info['origin_shape']
+    scale = im_info['scale']
+    face_index, landmark, prior_box = np_lmk[:]
+    xywh_res = []
+    if bboxes.shape == (1, 1) or bboxes is None:
+        return np.array([])
+    prior = np.reshape(prior_box, (-1, 4))
+    predict_lmk = np.reshape(landmark, (-1, 10))
+    k = 0
+    for i in range(bboxes.shape[0]):
+        score = bboxes[i][1]
+        if score < threshold:
+            continue
+        theindex = face_index[i][0]
+        me_prior = prior[theindex, :]
+        lmk_pred = predict_lmk[theindex, :]
+        prior_h = me_prior[2] - me_prior[0]
+        prior_w = me_prior[3] - me_prior[1]
+        prior_h_center = (me_prior[2] + me_prior[0]) / 2
+        prior_w_center = (me_prior[3] + me_prior[1]) / 2
+        lmk_decode = np.zeros((10))
+        for j in [0, 2, 4, 6, 8]:
+            lmk_decode[j] = lmk_pred[j] * 0.1 * prior_w + prior_h_center
+        for j in [1, 3, 5, 7, 9]:
+            lmk_decode[j] = lmk_pred[j] * 0.1 * prior_h + prior_w_center
+        if is_bbox_normalized:
+            lmk_decode = lmk_decode * np.array([
+                image_h, image_w, image_h, image_w, image_h, image_w, image_h,
+                image_w, image_h, image_w
+            ])
+        xywh_res.append(lmk_decode)
+    return np.asarray(xywh_res)
+def draw_lmk(image, lmk_results):
+    draw = ImageDraw.Draw(image)
+    for lmk_decode in lmk_results:
+        for j in range(5):
+            x1 = int(round(lmk_decode[2 * j]))
+            y1 = int(round(lmk_decode[2 * j + 1]))
+            draw.ellipse(
+                (x1 - 2, y1 - 2, x1 + 3, y1 + 3), fill='green', outline='green')
+    return image
--- a/ppdet/data/source/widerface.py
+++ b/ppdet/data/source/widerface.py
@@ -109,18 +109,24 @@ class WIDERFaceDataSet(DataSet):
        file_dict = {}
        num_class = 0
+        exts = ['jpg', 'jpeg', 'png', 'bmp']
+        exts += [ext.upper() for ext in exts]
        for i in range(len(lines_input_txt)):
            line_txt = lines_input_txt[i].strip('\n\t\r')
-            if '.jpg' in line_txt:
+            split_str = line_txt.split(' ')
-                if i != 0:
+            if len(split_str) == 1:
-                    num_class += 1
+                img_file_name = os.path.split(split_str[0])[1]
-                file_dict[num_class] = []
+                split_txt = img_file_name.split('.')
-                file_dict[num_class].append(line_txt)
+                if len(split_txt) < 2:
-            if '.jpg' not in line_txt:
+                    continue
+                elif split_txt[-1] in exts:
+                    if i != 0:
+                        num_class += 1
+                    file_dict[num_class] = [line_txt]
+            else:
                if len(line_txt) <= 6:
                    continue
                result_boxs = []
-                split_str = line_txt.split(' ')
                xmin = float(split_str[0])
                ymin = float(split_str[1])
                w = float(split_str[2])

--- a/ppdet/modeling/architectures/blazeface.py
+++ b/ppdet/modeling/architectures/blazeface.py
@@ -51,7 +51,7 @@ class BlazeFace(object):
    __category__ = 'architecture'
    __inject__ = ['backbone', 'output_decoder']
-    __shared__ = ['num_classes']
+    __shared__ = ['num_classes', 'with_lmk']
    def __init__(self,
                 backbone="BlazeNet",

--- a/ppdet/utils/export_utils.py
+++ b/ppdet/utils/export_utils.py
@@ -141,6 +141,10 @@ def dump_infer_config(FLAGS, config):
                infer_arch))
        os._exit(0)
+    # support land mark output
+    if 'with_lmk' in config and config['with_lmk'] == True:
+        infer_cfg['with_lmk'] = True
    if 'Mask' in config['architecture']:
        infer_cfg['mask_resolution'] = config['MaskHead']['resolution']
    infer_cfg['with_background'], infer_cfg['Preprocess'], infer_cfg[