From ef65c7e165cd893f04fded82a9200c8e1deb3b5a Mon Sep 17 00:00:00 2001
From: Guanghua Yu <742925032@qq.com>
Date: Tue, 15 Dec 2020 20:33:33 +0800
Subject: [PATCH] adapt keypoint detection for deploy (#1899)

---
 configs/face_detection/blazeface_keypoint.yml | 15 +++---
 deploy/python/infer.py                        | 42 +++++++++++++---
 deploy/python/visualize.py                    | 49 +++++++++++++++++++
 ppdet/data/source/widerface.py                | 20 +++++---
 ppdet/modeling/architectures/blazeface.py     |  2 +-
 ppdet/utils/export_utils.py                   |  4 ++
 6 files changed, 111 insertions(+), 21 deletions(-)

diff --git a/configs/face_detection/blazeface_keypoint.yml b/configs/face_detection/blazeface_keypoint.yml
index 299ff0582..b20cc5bbb 100644
--- a/configs/face_detection/blazeface_keypoint.yml
+++ b/configs/face_detection/blazeface_keypoint.yml
@@ -9,6 +9,7 @@ save_dir: output
 weights: output/blazeface_keypoint/model_final.pdparams
 # 1(label_class) + 1(background)
 num_classes: 2
+with_lmk: true
 
 BlazeFace:
   backbone: BlazeNet
@@ -19,7 +20,6 @@ BlazeFace:
     score_threshold: 0.01
   min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]]
   use_density_prior_box: false
-  with_lmk: true
   lmk_loss:
     overlap_threshold: 0.35
     neg_overlap: 0.35
@@ -103,12 +103,11 @@ EvalReader:
   - !DecodeImage
     to_rgb: true
   - !NormalizeBox {}
+  - !Permute {}
   - !NormalizeImage
-    is_channel_first: false
     is_scale: false
-    mean: [123, 117, 104]
+    mean: [104, 117, 123]
     std: [127.502231, 127.502231, 127.502231]
-  - !Permute {}
   batch_size: 1
 
 TestReader:
@@ -120,10 +119,12 @@ TestReader:
   sample_transforms:
   - !DecodeImage
     to_rgb: true
+  - !ResizeImage
+    target_size: 640
+    interp: 1
+  - !Permute {}
   - !NormalizeImage
-    is_channel_first: false
     is_scale: false
-    mean: [123, 117, 104]
+    mean: [104, 117, 123]
     std: [127.502231, 127.502231, 127.502231]
-  - !Permute {}
   batch_size: 1
diff --git a/deploy/python/infer.py b/deploy/python/infer.py
index a6ab42c4a..ae0ff80e9 100644
--- a/deploy/python/infer.py
+++ b/deploy/python/infer.py
@@ -34,8 +34,7 @@ import numpy as np
 import paddle
 import paddle.fluid as fluid
 from preprocess import preprocess, Resize, Normalize, Permute, PadStride
-from visualize import visualize_box_mask
-from ppdet.utils.check import enable_static_mode
+from visualize import visualize_box_mask, lmk2out
 
 # Global dictionary
 SUPPORT_MODELS = {
@@ -90,9 +89,12 @@ class Detector(object):
         inputs = create_inputs(im, im_info, self.config.arch)
         return inputs, im_info
 
-    def postprocess(self, np_boxes, np_masks, im_info, threshold=0.5):
+    def postprocess(self, np_boxes, np_masks, np_lmk, im_info, threshold=0.5):
         # postprocess output of predictor
         results = {}
+        if np_lmk is not None:
+            results['landmark'] = lmk2out(np_boxes, np_lmk, im_info, threshold)
+
         if self.config.arch in ['SSD', 'Face']:
             w, h = im_info['origin_shape']
             np_boxes[:, 2] *= h
@@ -129,7 +131,7 @@ class Detector(object):
                             shape:[N, class_num, mask_resolution, mask_resolution]
         '''
         inputs, im_info = self.preprocess(image)
-        np_boxes, np_masks = None, None
+        np_boxes, np_masks, np_lmk = None, None, None
         if self.config.use_python_inference:
             for i in range(warmup):
                 outs = self.executor.run(self.program,
@@ -164,6 +166,17 @@ class Detector(object):
                         output_names[1])
                     np_masks = masks_tensor.copy_to_cpu()
 
+                if self.config.with_lmk is not None and self.config.with_lmk == True:
+                    face_index = self.predictor.get_output_tensor(output_names[
+                        1])
+                    landmark = self.predictor.get_output_tensor(output_names[2])
+                    prior_boxes = self.predictor.get_output_tensor(output_names[
+                        3])
+                    np_face_index = face_index.copy_to_cpu()
+                    np_prior_boxes = prior_boxes.copy_to_cpu()
+                    np_landmark = landmark.copy_to_cpu()
+                    np_lmk = [np_face_index, np_landmark, np_prior_boxes]
+
             t1 = time.time()
             for i in range(repeats):
                 self.predictor.zero_copy_run()
@@ -174,6 +187,17 @@ class Detector(object):
                     masks_tensor = self.predictor.get_output_tensor(
                         output_names[1])
                     np_masks = masks_tensor.copy_to_cpu()
+
+                if self.config.with_lmk is not None and self.config.with_lmk == True:
+                    face_index = self.predictor.get_output_tensor(output_names[
+                        1])
+                    landmark = self.predictor.get_output_tensor(output_names[2])
+                    prior_boxes = self.predictor.get_output_tensor(output_names[
+                        3])
+                    np_face_index = face_index.copy_to_cpu()
+                    np_prior_boxes = prior_boxes.copy_to_cpu()
+                    np_landmark = landmark.copy_to_cpu()
+                    np_lmk = [np_face_index, np_landmark, np_prior_boxes]
             t2 = time.time()
             ms = (t2 - t1) * 1000.0 / repeats
             print("Inference: {} ms per batch image".format(ms))
@@ -186,7 +210,7 @@ class Detector(object):
                 results = {'boxes': np.array([])}
             else:
                 results = self.postprocess(
-                    np_boxes, np_masks, im_info, threshold=threshold)
+                    np_boxes, np_masks, np_lmk, im_info, threshold=threshold)
 
         return results
 
@@ -325,6 +349,9 @@ class Config():
         self.mask_resolution = None
         if 'mask_resolution' in yml_conf:
             self.mask_resolution = yml_conf['mask_resolution']
+        self.with_lmk = None
+        if 'with_lmk' in yml_conf:
+            self.with_lmk = yml_conf['with_lmk']
         self.print_config()
 
     def check_model(self, yml_conf):
@@ -522,7 +549,10 @@ def main():
 
 
 if __name__ == '__main__':
-    enable_static_mode()
+    try:
+        paddle.enable_static()
+    except:
+        pass
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument(
         "--model_dir",
diff --git a/deploy/python/visualize.py b/deploy/python/visualize.py
index 1c136be4d..439bf686c 100644
--- a/deploy/python/visualize.py
+++ b/deploy/python/visualize.py
@@ -56,6 +56,8 @@ def visualize_box_mask(im, results, labels, mask_resolution=14, threshold=0.5):
             results['score'],
             labels,
             threshold=threshold)
+    if 'landmark' in results:
+        im = draw_lmk(im, results['landmark'])
     return im
 
 
@@ -247,3 +249,50 @@ def draw_segm(im,
             1,
             lineType=cv2.LINE_AA)
     return Image.fromarray(im.astype('uint8'))
+
+
+def lmk2out(bboxes, np_lmk, im_info, threshold=0.5, is_bbox_normalized=True):
+    image_w, image_h = im_info['origin_shape']
+    scale = im_info['scale']
+    face_index, landmark, prior_box = np_lmk[:]
+    xywh_res = []
+    if bboxes.shape == (1, 1) or bboxes is None:
+        return np.array([])
+    prior = np.reshape(prior_box, (-1, 4))
+    predict_lmk = np.reshape(landmark, (-1, 10))
+    k = 0
+    for i in range(bboxes.shape[0]):
+        score = bboxes[i][1]
+        if score < threshold:
+            continue
+        theindex = face_index[i][0]
+        me_prior = prior[theindex, :]
+        lmk_pred = predict_lmk[theindex, :]
+        prior_h = me_prior[2] - me_prior[0]
+        prior_w = me_prior[3] - me_prior[1]
+        prior_h_center = (me_prior[2] + me_prior[0]) / 2
+        prior_w_center = (me_prior[3] + me_prior[1]) / 2
+        lmk_decode = np.zeros((10))
+        for j in [0, 2, 4, 6, 8]:
+            lmk_decode[j] = lmk_pred[j] * 0.1 * prior_w + prior_h_center
+        for j in [1, 3, 5, 7, 9]:
+            lmk_decode[j] = lmk_pred[j] * 0.1 * prior_h + prior_w_center
+
+        if is_bbox_normalized:
+            lmk_decode = lmk_decode * np.array([
+                image_h, image_w, image_h, image_w, image_h, image_w, image_h,
+                image_w, image_h, image_w
+            ])
+        xywh_res.append(lmk_decode)
+    return np.asarray(xywh_res)
+
+
+def draw_lmk(image, lmk_results):
+    draw = ImageDraw.Draw(image)
+    for lmk_decode in lmk_results:
+        for j in range(5):
+            x1 = int(round(lmk_decode[2 * j]))
+            y1 = int(round(lmk_decode[2 * j + 1]))
+            draw.ellipse(
+                (x1 - 2, y1 - 2, x1 + 3, y1 + 3), fill='green', outline='green')
+    return image
diff --git a/ppdet/data/source/widerface.py b/ppdet/data/source/widerface.py
index 7aab15337..75da05234 100644
--- a/ppdet/data/source/widerface.py
+++ b/ppdet/data/source/widerface.py
@@ -109,18 +109,24 @@ class WIDERFaceDataSet(DataSet):
 
         file_dict = {}
         num_class = 0
+        exts = ['jpg', 'jpeg', 'png', 'bmp']
+        exts += [ext.upper() for ext in exts]
         for i in range(len(lines_input_txt)):
             line_txt = lines_input_txt[i].strip('\n\t\r')
-            if '.jpg' in line_txt:
-                if i != 0:
-                    num_class += 1
-                file_dict[num_class] = []
-                file_dict[num_class].append(line_txt)
-            if '.jpg' not in line_txt:
+            split_str = line_txt.split(' ')
+            if len(split_str) == 1:
+                img_file_name = os.path.split(split_str[0])[1]
+                split_txt = img_file_name.split('.')
+                if len(split_txt) < 2:
+                    continue
+                elif split_txt[-1] in exts:
+                    if i != 0:
+                        num_class += 1
+                    file_dict[num_class] = [line_txt]
+            else:
                 if len(line_txt) <= 6:
                     continue
                 result_boxs = []
-                split_str = line_txt.split(' ')
                 xmin = float(split_str[0])
                 ymin = float(split_str[1])
                 w = float(split_str[2])
diff --git a/ppdet/modeling/architectures/blazeface.py b/ppdet/modeling/architectures/blazeface.py
index d1ed2095a..7508a6b08 100644
--- a/ppdet/modeling/architectures/blazeface.py
+++ b/ppdet/modeling/architectures/blazeface.py
@@ -51,7 +51,7 @@ class BlazeFace(object):
 
     __category__ = 'architecture'
     __inject__ = ['backbone', 'output_decoder']
-    __shared__ = ['num_classes']
+    __shared__ = ['num_classes', 'with_lmk']
 
     def __init__(self,
                  backbone="BlazeNet",
diff --git a/ppdet/utils/export_utils.py b/ppdet/utils/export_utils.py
index d98861d4a..1904e7cfd 100644
--- a/ppdet/utils/export_utils.py
+++ b/ppdet/utils/export_utils.py
@@ -141,6 +141,10 @@ def dump_infer_config(FLAGS, config):
                 infer_arch))
         os._exit(0)
 
+    # support land mark output
+    if 'with_lmk' in config and config['with_lmk'] == True:
+        infer_cfg['with_lmk'] = True
+
     if 'Mask' in config['architecture']:
         infer_cfg['mask_resolution'] = config['MaskHead']['resolution']
     infer_cfg['with_background'], infer_cfg['Preprocess'], infer_cfg[
-- 
GitLab