diff --git a/configs/face_detection/blazeface_keypoint.yml b/configs/face_detection/blazeface_keypoint.yml new file mode 100644 index 0000000000000000000000000000000000000000..07481f409833a69e5f9ec02d55027df1c6eb5e0b --- /dev/null +++ b/configs/face_detection/blazeface_keypoint.yml @@ -0,0 +1,128 @@ +architecture: BlazeFace +max_iters: 160000 +pretrain_weights: +use_gpu: true +snapshot_iter: 10000 +log_smooth_window: 20 +log_iter: 20 +metric: WIDERFACE +save_dir: output +weights: output/blazeface_keypoint/model_final.pdparams +# 1(label_class) + 1(background) +num_classes: 2 + +BlazeFace: + backbone: BlazeNet + output_decoder: + keep_top_k: 750 + nms_threshold: 0.3 + nms_top_k: 5000 + score_threshold: 0.01 + min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]] + use_density_prior_box: false + with_lmk: true + lmk_loss: + overlap_threshold: 0.35 + neg_overlap: 0.35 + +BlazeNet: + with_extra_blocks: true + lite_edition: false + +LearningRate: + base_lr: 0.002 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [120000, 150000] + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 + +TrainReader: + inputs_def: + image_shape: [3, 640, 640] + fields: ['image', 'gt_bbox', 'gt_class', 'gt_keypoint', 'keypoint_ignore'] + dataset: + !WIDERFaceDataSet + dataset_dir: dataset/wider_face + anno_path: wider_face_split/wider_face_train_bbx_lmk_gt.txt + image_dir: WIDER_train/images + with_lmk: true + sample_transforms: + - !DecodeImage + to_rgb: true + - !NormalizeBox {} + - !RandomDistort + brightness_lower: 0.875 + brightness_upper: 1.125 + is_order: true + - !ExpandImage + max_ratio: 4 + prob: 0.5 + - !CropImageWithDataAchorSampling + anchor_sampler: + - [1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0] + batch_sampler: + - [1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] + - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] + - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] + - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] + - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] + target_size: 640 + - !ResizeImage + target_size: 640 + interp: 1 + - !RandomInterpImage + target_size: 640 + - !RandomFlipImage + is_normalized: true + - !Permute {} + - !NormalizeImage + is_scale: false + mean: [104, 117, 123] + std: [127.502231, 127.502231, 127.502231] + batch_size: 16 + use_process: true + worker_num: 8 + shuffle: true + +EvalReader: + inputs_def: + fields: ['image', 'im_id'] + dataset: + !WIDERFaceDataSet + dataset_dir: dataset/wider_face + anno_path: wider_face_split/wider_face_val_bbx_gt.txt + image_dir: WIDER_val/images + sample_transforms: + - !DecodeImage + to_rgb: true + - !NormalizeBox {} + - !Permute {} + - !NormalizeImage + is_scale: false + mean: [104, 117, 123] + std: [127.502231, 127.502231, 127.502231] + batch_size: 1 + +TestReader: + inputs_def: + fields: ['image', 'im_id', 'im_shape'] + dataset: + !ImageFolder + use_default_label: true + sample_transforms: + - !DecodeImage + to_rgb: true + - !Permute {} + - !NormalizeImage + is_scale: false + mean: [104, 117, 123] + std: [127.502231, 127.502231, 127.502231] + batch_size: 1 diff --git a/dataset/wider_face/download.sh b/dataset/wider_face/download.sh index 6c86a22c6826d88846a16fbd43f8b556d8610b8f..59a2054def3dfa7e27a2ac7ba84b779800a32933 100755 --- a/dataset/wider_face/download.sh +++ b/dataset/wider_face/download.sh @@ -16,6 +16,6 @@ wget https://dataset.bj.bcebos.com/wider_face/WIDER_val.zip wget https://dataset.bj.bcebos.com/wider_face/wider_face_split.zip # Extract the data. echo "Extracting..." -unzip WIDER_train.zip -unzip WIDER_val.zip -unzip wider_face_split.zip +unzip -q WIDER_train.zip +unzip -q WIDER_val.zip +unzip -q wider_face_split.zip diff --git a/docs/featured_model/FACE_DETECTION.md b/docs/featured_model/FACE_DETECTION.md index d43f075dbbc3b9545004f9738c8dc90426245c7b..4a141857c87f83cc9abe69f4f50820f07622e333 100644 --- a/docs/featured_model/FACE_DETECTION.md +++ b/docs/featured_model/FACE_DETECTION.md @@ -8,6 +8,7 @@ - [数据准备](#数据准备) - [训练与推理](#训练与推理) - [评估](#评估) +- [人脸关键点检测](#人脸关键点检测) - [算法细节](#算法细节) - [如何贡献代码](#如何贡献代码) @@ -142,7 +143,7 @@ cd dataset/wider_face && ./download.sh 训练流程与推理流程方法与其他算法一致,请参考[GETTING_STARTED_cn.md](../tutorials/GETTING_STARTED_cn.md)。 **注意:** - `BlazeFace`和`FaceBoxes`训练是以每卡`batch_size=8`在4卡GPU上进行训练(总`batch_size`是32),并且训练320000轮 -(如果你的GPU数达不到4,请参考[学习率计算规则表](../tutorials/GETTING_STARTED_cn.html#faq))。 +(如果你的GPU数达不到4,请参考[学习率计算规则表](../FAQ.md))。 - 人脸检测模型目前我们不支持边训练边评估。 @@ -241,6 +242,20 @@ cd dataset/fddb/evaluation (2)`OUTPUT_DIR`是FDDB评估输出结果文件前缀,会生成两个文件`{OUTPUT_DIR}ContROC.txt`、`{OUTPUT_DIR}DiscROC.txt`; (3)参数用法及注释可通过执行`./evaluate --help`来获取。 + +## 人脸关键点检测 + +(1)下载PaddleDetection开放的WIDER-FACE数据集人脸关键点标注文件([链接](https://dataset.bj.bcebos.com/wider_face/wider_face_train_bbx_lmk_gt.txt)),并拷贝至`wider_face/wider_face_split`文件夹中: + +```shell +cd dataset/wider_face/wider_face_split/ +wget https://dataset.bj.bcebos.com/wider_face/wider_face_train_bbx_lmk_gt.txt +``` + +(2)使用`configs/face_detection/blazeface_keypoint.yml`配置文件进行训练与评估,使用方法与上一节内容一致。 + +![](../images/12_Group_Group_12_Group_Group_12_84.jpg) + ## 算法细节 ### BlazeFace @@ -257,7 +272,7 @@ cd dataset/fddb/evaluation - 原始版本: 参考原始论文复现; - Lite版本: 使用3x3卷积替换5x5卷积,更少的网络层数和通道数; - NAS版本: 使用神经网络搜索算法构建网络结构,相比于`Lite`版本,NAS版本需要更少的网络层数和通道数。 -- NAS_V2版本1: 基于PaddleSlim中SANAS算法在blazeface-NAS的基础上搜索出来的结构,相比`NAS`版本,NAS_V2版本的精度平均高出3个点,在855芯片上的硬件延时相对`NAS`版本仅增加5%。 +- NAS_V2版本: 基于PaddleSlim中SANAS算法在blazeface-NAS的基础上搜索出来的结构,相比`NAS`版本,NAS_V2版本的精度平均高出3个点,在855芯片上的硬件延时相对`NAS`版本仅增加5%。 ### FaceBoxes **简介:** diff --git a/docs/featured_model/FACE_DETECTION_en.md b/docs/featured_model/FACE_DETECTION_en.md index 2f9579fc27b62cd8f8898ce6425514426228c710..3bcc4a89e9e9a3f2d6d3a8c8fbc73114166970e5 100644 --- a/docs/featured_model/FACE_DETECTION_en.md +++ b/docs/featured_model/FACE_DETECTION_en.md @@ -8,6 +8,7 @@ English | [简体中文](FACE_DETECTION.md) - [Data Pipline](#Data-Pipline) - [Training and Inference](#Training-and-Inference) - [Evaluation](#Evaluation) +- [Face key-point detection](#Face-key-point-detection) - [Algorithm Description](#Algorithm-Description) - [Contributing](#Contributing) @@ -155,7 +156,7 @@ Please refer to [READER.md](../advanced_tutorials/READER.md) for details. **NOTES:** - `BlazeFace` and `FaceBoxes` is trained in 4 GPU with `batch_size=8` per gpu (total batch size as 32) and trained 320000 iters.(If your GPU count is not 4, please refer to the rule of training parameters -in the table of [calculation rules](../tutorials/GETTING_STARTED.html#faq)). +in the table of [calculation rules](../FAQ.md)). - Currently we do not support evaluation in training. ### Evaluation @@ -258,6 +259,20 @@ cd dataset/fddb/evaluation which will generate two files `{OUTPUT_DIR}ContROC.txt`、`{OUTPUT_DIR}DiscROC.txt`; (3)The interpretation of the argument can be performed by `./evaluate --help`. +## Face key-point detection + +(1)Download face key-point annotation file in WIDER FACE dataset([Link](https://dataset.bj.bcebos.com/wider_face/wider_face_train_bbx_lmk_gt.txt)), and copy to the folder `wider_face/wider_face_split`: + +```shell +cd dataset/wider_face/wider_face_split/ +wget https://dataset.bj.bcebos.com/wider_face/wider_face_train_bbx_lmk_gt.txt +``` + +(2)Use `configs/face_detection/blazeface_keypoint.yml` configuration file for training and evaluation, the method of use is the same as the previous section. + +![](../images/12_Group_Group_12_Group_Group_12_84.jpg) + + ## Algorithm Description ### BlazeFace diff --git a/docs/images/12_Group_Group_12_Group_Group_12_84.jpg b/docs/images/12_Group_Group_12_Group_Group_12_84.jpg new file mode 100644 index 0000000000000000000000000000000000000000..da1795dbafb386303e85d50ea7708a8a2f6f6a3f Binary files /dev/null and b/docs/images/12_Group_Group_12_Group_Group_12_84.jpg differ diff --git a/ppdet/data/source/widerface.py b/ppdet/data/source/widerface.py index 311430559e3706d6f175fc4a08fc40fb96bd37dc..7aab1533705ce3e853eb6c9cc1c20dbd9d8e064c 100644 --- a/ppdet/data/source/widerface.py +++ b/ppdet/data/source/widerface.py @@ -41,7 +41,8 @@ class WIDERFaceDataSet(DataSet): image_dir=None, anno_path=None, sample_num=-1, - with_background=True): + with_background=True, + with_lmk=False): super(WIDERFaceDataSet, self).__init__( image_dir=image_dir, anno_path=anno_path, @@ -53,6 +54,7 @@ class WIDERFaceDataSet(DataSet): self.with_background = with_background self.roidbs = None self.cname2cid = None + self.with_lmk = with_lmk def load_roidb_and_cname2cid(self): anno_path = os.path.join(self.dataset_dir, self.anno_path) @@ -62,33 +64,23 @@ class WIDERFaceDataSet(DataSet): records = [] ct = 0 - file_lists = _load_file_list(txt_file) + file_lists = self._load_file_list(txt_file) cname2cid = widerface_label(self.with_background) for item in file_lists: im_fname = item[0] im_id = np.array([ct]) - gt_bbox = np.zeros((len(item) - 2, 4), dtype=np.float32) - gt_class = np.ones((len(item) - 2, 1), dtype=np.int32) + gt_bbox = np.zeros((len(item) - 1, 4), dtype=np.float32) + gt_class = np.ones((len(item) - 1, 1), dtype=np.int32) + gt_lmk_labels = np.zeros((len(item) - 1, 10), dtype=np.float32) + lmk_ignore_flag = np.zeros((len(item) - 1, 1), dtype=np.int32) for index_box in range(len(item)): - if index_box >= 2: - temp_info_box = item[index_box].split(' ') - xmin = float(temp_info_box[0]) - ymin = float(temp_info_box[1]) - w = float(temp_info_box[2]) - h = float(temp_info_box[3]) - # Filter out wrong labels - if w < 0 or h < 0: - logger.warn('Illegal box with w: {}, h: {} in ' - 'img: {}, and it will be ignored'.format( - w, h, im_fname)) - continue - xmin = max(0, xmin) - ymin = max(0, ymin) - xmax = xmin + w - ymax = ymin + h - gt_bbox[index_box - 2] = [xmin, ymin, xmax, ymax] - + if index_box < 1: + continue + gt_bbox[index_box - 1] = item[index_box][0] + if self.with_lmk: + gt_lmk_labels[index_box - 1] = item[index_box][1] + lmk_ignore_flag[index_box - 1] = item[index_box][2] im_fname = os.path.join(image_dir, im_fname) if image_dir else im_fname widerface_rec = { @@ -97,7 +89,10 @@ class WIDERFaceDataSet(DataSet): 'gt_bbox': gt_bbox, 'gt_class': gt_class, } - # logger.debug + if self.with_lmk: + widerface_rec['gt_keypoint'] = gt_lmk_labels + widerface_rec['keypoint_ignore'] = lmk_ignore_flag + if len(item) != 0: records.append(widerface_rec) @@ -108,34 +103,64 @@ class WIDERFaceDataSet(DataSet): logger.debug('{} samples in file {}'.format(ct, anno_path)) self.roidbs, self.cname2cid = records, cname2cid - -def _load_file_list(input_txt): - with open(input_txt, 'r') as f_dir: - lines_input_txt = f_dir.readlines() - - file_dict = {} - num_class = 0 - for i in range(len(lines_input_txt)): - line_txt = lines_input_txt[i].strip('\n\t\r') - if '.jpg' in line_txt: - if i != 0: - num_class += 1 - file_dict[num_class] = [] - file_dict[num_class].append(line_txt) - if '.jpg' not in line_txt: - if len(line_txt) > 6: - split_str = line_txt.split(' ') - x1_min = float(split_str[0]) - y1_min = float(split_str[1]) - x2_max = float(split_str[2]) - y2_max = float(split_str[3]) - line_txt = str(x1_min) + ' ' + str(y1_min) + ' ' + str( - x2_max) + ' ' + str(y2_max) + def _load_file_list(self, input_txt): + with open(input_txt, 'r') as f_dir: + lines_input_txt = f_dir.readlines() + + file_dict = {} + num_class = 0 + for i in range(len(lines_input_txt)): + line_txt = lines_input_txt[i].strip('\n\t\r') + if '.jpg' in line_txt: + if i != 0: + num_class += 1 + file_dict[num_class] = [] file_dict[num_class].append(line_txt) - else: - file_dict[num_class].append(line_txt) - - return list(file_dict.values()) + if '.jpg' not in line_txt: + if len(line_txt) <= 6: + continue + result_boxs = [] + split_str = line_txt.split(' ') + xmin = float(split_str[0]) + ymin = float(split_str[1]) + w = float(split_str[2]) + h = float(split_str[3]) + # Filter out wrong labels + if w < 0 or h < 0: + logger.warn('Illegal box with w: {}, h: {} in ' + 'img: {}, and it will be ignored'.format( + w, h, file_dict[num_class][0])) + continue + xmin = max(0, xmin) + ymin = max(0, ymin) + xmax = xmin + w + ymax = ymin + h + gt_bbox = [xmin, ymin, xmax, ymax] + result_boxs.append(gt_bbox) + if self.with_lmk: + assert len(split_str) > 18, 'When `with_lmk=True`, the number' \ + 'of characters per line in the annotation file should' \ + 'exceed 18.' + lmk0_x = float(split_str[5]) + lmk0_y = float(split_str[6]) + lmk1_x = float(split_str[8]) + lmk1_y = float(split_str[9]) + lmk2_x = float(split_str[11]) + lmk2_y = float(split_str[12]) + lmk3_x = float(split_str[14]) + lmk3_y = float(split_str[15]) + lmk4_x = float(split_str[17]) + lmk4_y = float(split_str[18]) + lmk_ignore_flag = 0 if lmk0_x == -1 else 1 + gt_lmk_label = [ + lmk0_x, lmk0_y, lmk1_x, lmk1_y, lmk2_x, lmk2_y, lmk3_x, + lmk3_y, lmk4_x, lmk4_y + ] + result_boxs.append(gt_lmk_label) + result_boxs.append(lmk_ignore_flag) + file_dict[num_class].append(result_boxs) + + return list(file_dict.values()) def widerface_label(with_background=True): diff --git a/ppdet/data/transform/op_helper.py b/ppdet/data/transform/op_helper.py index d41efd9341a8717577b63e56c67b8e64d69d3393..a9d19b96dadeaa80c77f3f08e3db042823b5a86a 100644 --- a/ppdet/data/transform/op_helper.py +++ b/ppdet/data/transform/op_helper.py @@ -61,10 +61,13 @@ def is_overlap(object_bbox, sample_bbox): return True -def filter_and_process(sample_bbox, bboxes, labels, scores=None): +def filter_and_process(sample_bbox, bboxes, labels, scores=None, + keypoints=None): new_bboxes = [] new_labels = [] new_scores = [] + new_keypoints = [] + new_kp_ignore = [] for i in range(len(bboxes)): new_bbox = [0, 0, 0, 0] obj_bbox = [bboxes[i][0], bboxes[i][1], bboxes[i][2], bboxes[i][3]] @@ -84,9 +87,24 @@ def filter_and_process(sample_bbox, bboxes, labels, scores=None): new_labels.append([labels[i][0]]) if scores is not None: new_scores.append([scores[i][0]]) + if keypoints is not None: + sample_keypoint = keypoints[0][i] + for j in range(len(sample_keypoint)): + kp_len = sample_height if j % 2 else sample_width + sample_coord = sample_bbox[1] if j % 2 else sample_bbox[0] + sample_keypoint[j] = ( + sample_keypoint[j] - sample_coord) / kp_len + sample_keypoint[j] = max(min(sample_keypoint[j], 1.0), 0.0) + new_keypoints.append(sample_keypoint) + new_kp_ignore.append(keypoints[1][i]) + bboxes = np.array(new_bboxes) labels = np.array(new_labels) scores = np.array(new_scores) + if keypoints is not None: + keypoints = np.array(new_keypoints) + new_kp_ignore = np.array(new_kp_ignore) + return bboxes, labels, scores, (keypoints, new_kp_ignore) return bboxes, labels, scores diff --git a/ppdet/data/transform/operators.py b/ppdet/data/transform/operators.py index f04feedd374b4eec58c9fd852587ef08ed9d1954..79ed2b6bc984710cba1765bfeb4da92681a18fc3 100644 --- a/ppdet/data/transform/operators.py +++ b/ppdet/data/transform/operators.py @@ -32,9 +32,10 @@ import logging import random import math import numpy as np +import os import cv2 -from PIL import Image, ImageEnhance +from PIL import Image, ImageEnhance, ImageDraw from ppdet.core.workspace import serializable from ppdet.modeling.ops import AnchorGrid @@ -400,6 +401,16 @@ class RandomFlipImage(BaseOperator): flipped_segms.append(_flip_rle(segm, height, width)) return flipped_segms + def flip_keypoint(self, gt_keypoint, width): + for i in range(gt_keypoint.shape[1]): + if i % 2 == 0: + old_x = gt_keypoint[:, i].copy() + if self.is_normalized: + gt_keypoint[:, i] = 1 - old_x + else: + gt_keypoint[:, i] = width - old_x - 1 + return gt_keypoint + def __call__(self, sample, context=None): """Filp the image and bounding box. Operators: @@ -447,6 +458,9 @@ class RandomFlipImage(BaseOperator): if self.is_mask_flip and len(sample['gt_poly']) != 0: sample['gt_poly'] = self.flip_segms(sample['gt_poly'], height, width) + if 'gt_keypoint' in sample.keys(): + sample['gt_keypoint'] = self.flip_keypoint( + sample['gt_keypoint'], width) sample['flipped'] = True sample['image'] = im sample = samples if batch_input else samples[0] @@ -741,8 +755,17 @@ class ExpandImage(BaseOperator): im = Image.fromarray(im) expand_im.paste(im, (int(w_off), int(h_off))) expand_im = np.asarray(expand_im) - gt_bbox, gt_class, _ = filter_and_process(expand_bbox, gt_bbox, - gt_class) + if 'gt_keypoint' in sample.keys( + ) and 'keypoint_ignore' in sample.keys(): + keypoints = (sample['gt_keypoint'], + sample['keypoint_ignore']) + gt_bbox, gt_class, _, gt_keypoints = filter_and_process( + expand_bbox, gt_bbox, gt_class, keypoints=keypoints) + sample['gt_keypoint'] = gt_keypoints[0] + sample['keypoint_ignore'] = gt_keypoints[1] + else: + gt_bbox, gt_class, _ = filter_and_process(expand_bbox, + gt_bbox, gt_class) sample['image'] = expand_im sample['gt_bbox'] = gt_bbox sample['gt_class'] = gt_class @@ -816,7 +839,7 @@ class CropImage(BaseOperator): sample_bbox = sampled_bbox.pop(idx) sample_bbox = clip_bbox(sample_bbox) crop_bbox, crop_class, crop_score = \ - filter_and_process(sample_bbox, gt_bbox, gt_class, gt_score) + filter_and_process(sample_bbox, gt_bbox, gt_class, scores=gt_score) if self.avoid_no_bbox: if len(crop_bbox) < 1: continue @@ -919,8 +942,16 @@ class CropImageWithDataAchorSampling(BaseOperator): idx = int(np.random.uniform(0, len(sampled_bbox))) sample_bbox = sampled_bbox.pop(idx) - crop_bbox, crop_class, crop_score = filter_and_process( - sample_bbox, gt_bbox, gt_class, gt_score) + if 'gt_keypoint' in sample.keys(): + keypoints = (sample['gt_keypoint'], + sample['keypoint_ignore']) + crop_bbox, crop_class, crop_score, gt_keypoints = \ + filter_and_process(sample_bbox, gt_bbox, gt_class, + scores=gt_score, + keypoints=keypoints) + else: + crop_bbox, crop_class, crop_score = filter_and_process( + sample_bbox, gt_bbox, gt_class, scores=gt_score) crop_bbox, crop_class, crop_score = bbox_area_sampling( crop_bbox, crop_class, crop_score, self.target_size, self.min_size) @@ -934,6 +965,9 @@ class CropImageWithDataAchorSampling(BaseOperator): sample['gt_bbox'] = crop_bbox sample['gt_class'] = crop_class sample['gt_score'] = crop_score + if 'gt_keypoint' in sample.keys(): + sample['gt_keypoint'] = gt_keypoints[0] + sample['keypoint_ignore'] = gt_keypoints[1] return sample return sample @@ -955,8 +989,16 @@ class CropImageWithDataAchorSampling(BaseOperator): sample_bbox = sampled_bbox.pop(idx) sample_bbox = clip_bbox(sample_bbox) - crop_bbox, crop_class, crop_score = filter_and_process( - sample_bbox, gt_bbox, gt_class, gt_score) + if 'gt_keypoint' in sample.keys(): + keypoints = (sample['gt_keypoint'], + sample['keypoint_ignore']) + crop_bbox, crop_class, crop_score, gt_keypoints = \ + filter_and_process(sample_bbox, gt_bbox, gt_class, + scores=gt_score, + keypoints=keypoints) + else: + crop_bbox, crop_class, crop_score = filter_and_process( + sample_bbox, gt_bbox, gt_class, scores=gt_score) # sampling bbox according the bbox area crop_bbox, crop_class, crop_score = bbox_area_sampling( crop_bbox, crop_class, crop_score, self.target_size, @@ -974,6 +1016,9 @@ class CropImageWithDataAchorSampling(BaseOperator): sample['gt_bbox'] = crop_bbox sample['gt_class'] = crop_class sample['gt_score'] = crop_score + if 'gt_keypoint' in sample.keys(): + sample['gt_keypoint'] = gt_keypoints[0] + sample['keypoint_ignore'] = gt_keypoints[1] return sample return sample @@ -995,6 +1040,17 @@ class NormalizeBox(BaseOperator): gt_bbox[i][2] = gt_bbox[i][2] / width gt_bbox[i][3] = gt_bbox[i][3] / height sample['gt_bbox'] = gt_bbox + + if 'gt_keypoint' in sample.keys(): + gt_keypoint = sample['gt_keypoint'] + + for i in range(gt_keypoint.shape[1]): + if i % 2: + gt_keypoint[:, i] = gt_keypoint[:, i] / height + else: + gt_keypoint[:, i] = gt_keypoint[:, i] / width + sample['gt_keypoint'] = gt_keypoint + return sample @@ -1837,7 +1893,6 @@ class BboxXYXY2XYWH(BaseOperator): return sample -@register_op class Lighting(BaseOperator): """ Lighting the imagen by eigenvalues and eigenvectors @@ -2270,3 +2325,69 @@ class TargetAssign(BaseOperator): targets[matched_indices] = matched_targets sample['fg_num'] = np.array(len(matched_targets), dtype=np.int32) return sample + + +@register_op +class DebugVisibleImage(BaseOperator): + """ + In debug mode, visualize images according to `gt_box`. + (Currently only supported when not cropping and flipping image.) + """ + + def __init__(self, output_dir='output/debug', is_normalized=False): + super(DebugVisibleImage, self).__init__() + self.is_normalized = is_normalized + self.output_dir = output_dir + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + if not isinstance(self.is_normalized, bool): + raise TypeError("{}: input type is invalid.".format(self)) + + def __call__(self, sample, context=None): + image = Image.open(sample['im_file']).convert('RGB') + out_file_name = sample['im_file'].split('/')[-1] + width = sample['w'] + height = sample['h'] + gt_bbox = sample['gt_bbox'] + gt_class = sample['gt_class'] + draw = ImageDraw.Draw(image) + for i in range(gt_bbox.shape[0]): + if self.is_normalized: + gt_bbox[i][0] = gt_bbox[i][0] * width + gt_bbox[i][1] = gt_bbox[i][1] * height + gt_bbox[i][2] = gt_bbox[i][2] * width + gt_bbox[i][3] = gt_bbox[i][3] * height + + xmin, ymin, xmax, ymax = gt_bbox[i] + draw.line( + [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin), + (xmin, ymin)], + width=2, + fill='green') + # draw label + text = str(gt_class[i][0]) + tw, th = draw.textsize(text) + draw.rectangle( + [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green') + draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255)) + + if 'gt_keypoint' in sample.keys(): + gt_keypoint = sample['gt_keypoint'] + if self.is_normalized: + for i in range(gt_keypoint.shape[1]): + if i % 2: + gt_keypoint[:, i] = gt_keypoint[:, i] * height + else: + gt_keypoint[:, i] = gt_keypoint[:, i] * width + for i in range(gt_keypoint.shape[0]): + keypoint = gt_keypoint[i] + for j in range(int(keypoint.shape[0] / 2)): + x1 = round(keypoint[2 * j]).astype(np.int32) + y1 = round(keypoint[2 * j + 1]).astype(np.int32) + draw.ellipse( + (x1, y1, x1 + 5, y1i + 5), + fill='green', + outline='green') + save_path = os.path.join(self.output_dir, out_file_name) + image.save(save_path, quality=95) + return sample diff --git a/ppdet/modeling/architectures/blazeface.py b/ppdet/modeling/architectures/blazeface.py index 62230cc5f316b0a33fcfbcd720a02eed1f907eaa..d740d07c09d6c5379a336a2360571982af5bf3a3 100644 --- a/ppdet/modeling/architectures/blazeface.py +++ b/ppdet/modeling/architectures/blazeface.py @@ -25,6 +25,7 @@ from paddle.fluid.regularizer import L2Decay from ppdet.core.workspace import register from ppdet.modeling.ops import SSDOutputDecoder +from ppdet.modeling.losses import SSDWithLmkLoss __all__ = ['BlazeFace'] @@ -59,24 +60,29 @@ class BlazeFace(object): steps=[8., 16.], num_classes=2, use_density_prior_box=False, - densities=[[2, 2], [2, 1, 1, 1, 1, 1]]): + densities=[[2, 2], [2, 1, 1, 1, 1, 1]], + with_lmk=False, + lmk_loss=SSDWithLmkLoss().__dict__): super(BlazeFace, self).__init__() self.backbone = backbone self.num_classes = num_classes + self.with_lmk = with_lmk self.output_decoder = output_decoder if isinstance(output_decoder, dict): + if self.with_lmk: + output_decoder['return_index'] = True self.output_decoder = SSDOutputDecoder(**output_decoder) self.min_sizes = min_sizes self.max_sizes = max_sizes self.steps = steps self.use_density_prior_box = use_density_prior_box self.densities = densities + self.landmark = None + if self.with_lmk and isinstance(lmk_loss, dict): + self.lmk_loss = SSDWithLmkLoss(**lmk_loss) def build(self, feed_vars, mode='train'): im = feed_vars['image'] - if mode == 'train': - gt_bbox = feed_vars['gt_bbox'] - gt_class = feed_vars['gt_class'] body_feats = self.backbone(im) locs, confs, box, box_var = self._multi_box_head( @@ -86,20 +92,40 @@ class BlazeFace(object): use_density_prior_box=self.use_density_prior_box) if mode == 'train': - loss = fluid.layers.ssd_loss( - locs, - confs, - gt_bbox, - gt_class, - box, - box_var, - overlap_threshold=0.35, - neg_overlap=0.35) + gt_bbox = feed_vars['gt_bbox'] + gt_class = feed_vars['gt_class'] + if self.with_lmk: + lmk_labels = feed_vars['gt_keypoint'] + lmk_ignore_flag = feed_vars["keypoint_ignore"] + loss = self.lmk_loss(locs, confs, gt_bbox, gt_class, + self.landmark, lmk_labels, lmk_ignore_flag, + box, box_var) + else: + loss = fluid.layers.ssd_loss( + locs, + confs, + gt_bbox, + gt_class, + box, + box_var, + overlap_threshold=0.35, + neg_overlap=0.35) + loss = fluid.layers.reduce_sum(loss) return {'loss': loss} else: - pred = self.output_decoder(locs, confs, box, box_var) - return {'bbox': pred} + if self.with_lmk: + pred, face_index = self.output_decoder(locs, confs, box, + box_var) + return { + 'bbox': pred, + 'face_index': face_index, + 'prior_boxes': box, + 'landmark': self.landmark + } + else: + pred = self.output_decoder(locs, confs, box, box_var) + return {'bbox': pred} def _multi_box_head(self, inputs, @@ -111,11 +137,9 @@ class BlazeFace(object): compile_shape = [0, -1, last_dim] return fluid.layers.reshape(trans, shape=compile_shape) - def _is_list_or_tuple_(data): - return (isinstance(data, list) or isinstance(data, tuple)) - locs, confs = [], [] boxes, vars = [], [] + lmk_locs = [] b_attr = ParamAttr(learning_rate=2., regularizer=L2Decay(0.)) for i, input in enumerate(inputs): @@ -157,7 +181,21 @@ class BlazeFace(object): # get conf mbox_conf = fluid.layers.conv2d( input, num_conf_output, 3, 1, 1, bias_attr=b_attr) - conf = permute_and_reshape(mbox_conf, 2) + conf = permute_and_reshape(mbox_conf, num_classes) + + if self.with_lmk: + # get landmark + lmk_loc_output = num_boxes * 10 + lmk_box_loc = fluid.layers.conv2d( + input, + lmk_loc_output, + 3, + 1, + 1, + param_attr=ParamAttr(name='lmk' + str(i) + '_weights'), + bias_attr=False) + lmk_loc = permute_and_reshape(lmk_box_loc, 10) + lmk_locs.append(lmk_loc) locs.append(loc) confs.append(conf) @@ -168,6 +206,8 @@ class BlazeFace(object): face_mbox_conf = fluid.layers.concat(confs, axis=1) prior_boxes = fluid.layers.concat(boxes) box_vars = fluid.layers.concat(vars) + if self.with_lmk: + self.landmark = fluid.layers.concat(lmk_locs, axis=1) return face_mbox_loc, face_mbox_conf, prior_boxes, box_vars def _inputs_def(self, image_shape): @@ -179,6 +219,8 @@ class BlazeFace(object): 'gt_bbox': {'shape': [None, 4], 'dtype': 'float32', 'lod_level': 1}, 'gt_class': {'shape': [None, 1], 'dtype': 'int32', 'lod_level': 1}, 'im_shape': {'shape': [None, 3], 'dtype': 'int32', 'lod_level': 0}, + 'gt_keypoint': {'shape': [None, 10], 'dtype': 'float32', 'lod_level': 1}, + 'keypoint_ignore': {'shape': [None, 1], 'dtype': 'float32', 'lod_level': 1}, } # yapf: enable return inputs_def diff --git a/ppdet/modeling/losses/__init__.py b/ppdet/modeling/losses/__init__.py index d1b4dcca245cd56a42d0add8681b6895f6559e3d..f4a1f65b23b05f7b68d3b31377223ad72decb7cb 100644 --- a/ppdet/modeling/losses/__init__.py +++ b/ppdet/modeling/losses/__init__.py @@ -23,6 +23,7 @@ from . import balanced_l1_loss from . import fcos_loss from . import diou_loss_yolo from . import iou_aware_loss +from . import ssd_with_lmk_loss from .iou_aware_loss import * from .yolo_loss import * @@ -33,3 +34,4 @@ from .iou_loss import * from .balanced_l1_loss import * from .fcos_loss import * from .diou_loss_yolo import * +from .ssd_with_lmk_loss import * \ No newline at end of file diff --git a/ppdet/modeling/losses/ssd_with_lmk_loss.py b/ppdet/modeling/losses/ssd_with_lmk_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..8cedee2d4ea32432046c8f0bbcb5c2b3a221da2b --- /dev/null +++ b/ppdet/modeling/losses/ssd_with_lmk_loss.py @@ -0,0 +1,241 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as fluid +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.framework import Variable +import paddle.fluid.layers as layers +from paddle.fluid.layers import (tensor, iou_similarity, bipartite_match, + target_assign, box_coder) +from ppdet.core.workspace import register, serializable + +__all__ = ['SSDWithLmkLoss'] + + +@register +@serializable +class SSDWithLmkLoss(object): + """ + ssd_with_lmk_loss function. + Args: + background_label (int): The index of background label, 0 by default. + overlap_threshold (float): If match_type is `per_prediction`, + use `overlap_threshold` to determine the extra matching bboxes + when finding matched boxes. 0.5 by default. + neg_pos_ratio (float): The ratio of the negative boxes to the positive + boxes, used only when mining_type is `max_negative`, 3.0 by default. + neg_overlap (float): The negative overlap upper bound for the unmatched + predictions. Use only when mining_type is `max_negative`, 0.5 by default. + loc_loss_weight (float): Weight for localization loss, 1.0 by default. + conf_loss_weight (float): Weight for confidence loss, 1.0 by default. + match_type (str): The type of matching method during training, should be + `bipartite` or `per_prediction`, `per_prediction` by default. + normalize (bool): Whether to normalize the loss by the total number of + output locations, True by default. + """ + + def __init__(self, + background_label=0, + overlap_threshold=0.5, + neg_pos_ratio=3.0, + neg_overlap=0.5, + loc_loss_weight=1.0, + conf_loss_weight=1.0, + match_type='per_prediction', + normalize=True): + super(SSDWithLmkLoss, self).__init__() + self.background_label = background_label + self.overlap_threshold = overlap_threshold + self.neg_pos_ratio = neg_pos_ratio + self.neg_overlap = neg_overlap + self.loc_loss_weight = loc_loss_weight + self.conf_loss_weight = conf_loss_weight + self.match_type = match_type + self.normalize = normalize + + def __call__(self, + location, + confidence, + gt_box, + gt_label, + landmark_predict, + lmk_label, + lmk_ignore_flag, + prior_box, + prior_box_var=None): + def _reshape_to_2d(var): + return layers.flatten(x=var, axis=2) + + helper = LayerHelper('ssd_loss') #, **locals()) + # Only support mining_type == 'max_negative' now. + mining_type = 'max_negative' + # The max `sample_size` of negative box, used only + # when mining_type is `hard_example`. + sample_size = None + num, num_prior, num_class = confidence.shape + conf_shape = layers.shape(confidence) + + # 1. Find matched boundding box by prior box. + # 1.1 Compute IOU similarity between ground-truth boxes and prior boxes. + iou = iou_similarity(x=gt_box, y=prior_box) + # 1.2 Compute matched boundding box by bipartite matching algorithm. + matched_indices, matched_dist = bipartite_match(iou, self.match_type, + self.overlap_threshold) + + # 2. Compute confidence for mining hard examples + # 2.1. Get the target label based on matched indices + gt_label = layers.reshape( + x=gt_label, shape=(len(gt_label.shape) - 1) * (0, ) + (-1, 1)) + gt_label.stop_gradient = True + target_label, _ = target_assign( + gt_label, matched_indices, mismatch_value=self.background_label) + # 2.2. Compute confidence loss. + # Reshape confidence to 2D tensor. + confidence = _reshape_to_2d(confidence) + target_label = tensor.cast(x=target_label, dtype='int64') + target_label = _reshape_to_2d(target_label) + target_label.stop_gradient = True + conf_loss = layers.softmax_with_cross_entropy(confidence, target_label) + # 3. Mining hard examples + actual_shape = layers.slice(conf_shape, axes=[0], starts=[0], ends=[2]) + actual_shape.stop_gradient = True + conf_loss = layers.reshape( + x=conf_loss, shape=(-1, 0), actual_shape=actual_shape) + conf_loss.stop_gradient = True + neg_indices = helper.create_variable_for_type_inference(dtype='int32') + updated_matched_indices = helper.create_variable_for_type_inference( + dtype=matched_indices.dtype) + helper.append_op( + type='mine_hard_examples', + inputs={ + 'ClsLoss': conf_loss, + 'LocLoss': None, + 'MatchIndices': matched_indices, + 'MatchDist': matched_dist, + }, + outputs={ + 'NegIndices': neg_indices, + 'UpdatedMatchIndices': updated_matched_indices + }, + attrs={ + 'neg_pos_ratio': self.neg_pos_ratio, + 'neg_dist_threshold': self.neg_overlap, + 'mining_type': mining_type, + 'sample_size': sample_size, + }) + + # 4. Assign classification and regression targets + # 4.1. Encoded bbox according to the prior boxes. + encoded_bbox = box_coder( + prior_box=prior_box, + prior_box_var=prior_box_var, + target_box=gt_box, + code_type='encode_center_size') + # 4.2. Assign regression targets + target_bbox, target_loc_weight = target_assign( + encoded_bbox, + updated_matched_indices, + mismatch_value=self.background_label) + # 4.3. Assign classification targets + target_label, target_conf_weight = target_assign( + gt_label, + updated_matched_indices, + negative_indices=neg_indices, + mismatch_value=self.background_label) + + target_loc_weight = target_loc_weight * target_label + encoded_lmk_label = self.decode_lmk(lmk_label, prior_box, prior_box_var) + + target_lmk, target_lmk_weight = target_assign( + encoded_lmk_label, + updated_matched_indices, + mismatch_value=self.background_label) + lmk_ignore_flag = layers.reshape( + x=lmk_ignore_flag, + shape=(len(lmk_ignore_flag.shape) - 1) * (0, ) + (-1, 1)) + target_ignore, nouse = target_assign( + lmk_ignore_flag, + updated_matched_indices, + mismatch_value=self.background_label) + + target_lmk_weight = target_lmk_weight * target_ignore + landmark_predict = _reshape_to_2d(landmark_predict) + target_lmk = _reshape_to_2d(target_lmk) + target_lmk_weight = _reshape_to_2d(target_lmk_weight) + lmk_loss = layers.smooth_l1(landmark_predict, target_lmk) + lmk_loss = lmk_loss * target_lmk_weight + target_lmk.stop_gradient = True + target_lmk_weight.stop_gradient = True + target_ignore.stop_gradient = True + nouse.stop_gradient = True + + # 5. Compute loss. + # 5.1 Compute confidence loss. + target_label = _reshape_to_2d(target_label) + target_label = tensor.cast(x=target_label, dtype='int64') + + conf_loss = layers.softmax_with_cross_entropy(confidence, target_label) + target_conf_weight = _reshape_to_2d(target_conf_weight) + conf_loss = conf_loss * target_conf_weight + + # the target_label and target_conf_weight do not have gradient. + target_label.stop_gradient = True + target_conf_weight.stop_gradient = True + + # 5.2 Compute regression loss. + location = _reshape_to_2d(location) + target_bbox = _reshape_to_2d(target_bbox) + + loc_loss = layers.smooth_l1(location, target_bbox) + target_loc_weight = _reshape_to_2d(target_loc_weight) + loc_loss = loc_loss * target_loc_weight + + # the target_bbox and target_loc_weight do not have gradient. + target_bbox.stop_gradient = True + target_loc_weight.stop_gradient = True + + # 5.3 Compute overall weighted loss. + loss = self.conf_loss_weight * conf_loss + self.loc_loss_weight * loc_loss + 0.4 * lmk_loss + # reshape to [N, Np], N is the batch size and Np is the prior box number. + loss = layers.reshape(x=loss, shape=(-1, 0), actual_shape=actual_shape) + loss = layers.reduce_sum(loss, dim=1, keep_dim=True) + if self.normalize: + normalizer = layers.reduce_sum(target_loc_weight) + 1 + loss = loss / normalizer + + return loss + + def decode_lmk(self, lmk_label, prior_box, prior_box_var): + label0, label1, label2, label3, label4 = fluid.layers.split( + lmk_label, num_or_sections=5, dim=1) + lmk_labels_list = [label0, label1, label2, label3, label4] + encoded_lmk_list = [] + for label in lmk_labels_list: + concat_label = fluid.layers.concat([label, label], axis=1) + encoded_label = box_coder( + prior_box=prior_box, + prior_box_var=prior_box_var, + target_box=concat_label, + code_type='encode_center_size') + encoded_lmk_label, _ = fluid.layers.split( + encoded_label, num_or_sections=2, dim=2) + encoded_lmk_list.append(encoded_lmk_label) + + encoded_lmk_concat = fluid.layers.concat( + [ + encoded_lmk_list[0], encoded_lmk_list[1], encoded_lmk_list[2], + encoded_lmk_list[3], encoded_lmk_list[4] + ], + axis=2) + return encoded_lmk_concat diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py index 972fcdf16399422d247a056f364f8ee0a4db3904..9f288b42f255eae9c0aca9a438da8793e85efafa 100644 --- a/ppdet/modeling/ops.py +++ b/ppdet/modeling/ops.py @@ -1478,7 +1478,8 @@ class SSDOutputDecoder(object): keep_top_k=200, score_threshold=0.01, nms_eta=1.0, - background_label=0): + background_label=0, + return_index=False): super(SSDOutputDecoder, self).__init__() self.nms_threshold = nms_threshold self.background_label = background_label @@ -1486,6 +1487,7 @@ class SSDOutputDecoder(object): self.keep_top_k = keep_top_k self.score_threshold = score_threshold self.nms_eta = nms_eta + self.return_index = return_index @register diff --git a/ppdet/utils/checkpoint.py b/ppdet/utils/checkpoint.py index 42fe8194d33450c9f8983ff6e541f14baf7ec392..9461be8a38269ab87f21f40e5f87cacc8cf86cdb 100644 --- a/ppdet/utils/checkpoint.py +++ b/ppdet/utils/checkpoint.py @@ -115,7 +115,8 @@ def load_params(exe, prog, path, ignore_params=[]): path = _get_weight_path(path) path = _strip_postfix(path) - if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')): + if not (os.path.isdir(path) or os.path.isfile(path) or + os.path.exists(path + '.pdparams')): raise ValueError("Model pretrain path {} does not " "exists.".format(path)) diff --git a/ppdet/utils/visualizer.py b/ppdet/utils/visualizer.py index 0658c8c355db67aa54c4e461b5eeb40506d668bc..0af8601e570b28184210e2c5466c5fa24bdba818 100644 --- a/ppdet/utils/visualizer.py +++ b/ppdet/utils/visualizer.py @@ -30,7 +30,8 @@ def visualize_results(image, catid2name, threshold=0.5, bbox_results=None, - mask_results=None): + mask_results=None, + lmk_results=None): """ Visualize bbox and mask results """ @@ -38,6 +39,8 @@ def visualize_results(image, image = draw_mask(image, im_id, mask_results, threshold) if bbox_results: image = draw_bbox(image, im_id, catid2name, bbox_results, threshold) + if lmk_results: + image = draw_lmk(image, im_id, lmk_results, threshold) return image @@ -106,3 +109,21 @@ def draw_bbox(image, im_id, catid2name, bboxes, threshold): draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255)) return image + + +def draw_lmk(image, im_id, lmk_results, threshold): + draw = ImageDraw.Draw(image) + catid2color = {} + color_list = colormap(rgb=True)[:40] + for dt in np.array(lmk_results): + lmk_decode, score = dt['landmark'], dt['score'] + if im_id != dt['image_id']: + continue + if score < threshold: + continue + for j in range(5): + x1 = int(round(lmk_decode[2 * j])) + y1 = int(round(lmk_decode[2 * j + 1])) + draw.ellipse( + (x1, y1, x1 + 5, y1 + 5), fill='green', outline='green') + return image diff --git a/ppdet/utils/widerface_eval_utils.py b/ppdet/utils/widerface_eval_utils.py index 7f35c2431076cd654114539b290663e9dccbd950..e7447e8fee0e3fc107351748c9e11c5c277462ad 100644 --- a/ppdet/utils/widerface_eval_utils.py +++ b/ppdet/utils/widerface_eval_utils.py @@ -27,7 +27,7 @@ logger = logging.getLogger(__name__) __all__ = [ 'get_shrink', 'bbox_vote', 'save_widerface_bboxes', 'save_fddb_bboxes', - 'to_chw_bgr', 'bbox2out', 'get_category_info' + 'to_chw_bgr', 'bbox2out', 'get_category_info', 'lmk2out' ] @@ -227,3 +227,58 @@ def widerfaceall_category_info(with_background=True): catid2name = {i: name for i, name in enumerate(cats)} return clsid2catid, catid2name + + +def lmk2out(results, is_bbox_normalized=False): + """ + Args: + results: request a dict, should include: `landmark`, `im_id`, + if is_bbox_normalized=True, also need `im_shape`. + is_bbox_normalized: whether or not landmark is normalized. + """ + xywh_res = [] + for t in results: + bboxes = t['bbox'][0] + lengths = t['bbox'][1][0] + im_ids = np.array(t['im_id'][0]).flatten() + if bboxes.shape == (1, 1) or bboxes is None: + continue + face_index = t['face_index'][0] + prior_box = t['prior_boxes'][0] + predict_lmk = t['landmark'][0] + prior = np.reshape(prior_box, (-1, 4)) + predictlmk = np.reshape(predict_lmk, (-1, 10)) + + k = 0 + for a in range(len(lengths)): + num = lengths[a] + im_id = int(im_ids[a]) + for i in range(num): + score = bboxes[k][1] + theindex = face_index[i][0] + me_prior = prior[theindex, :] + lmk_pred = predictlmk[theindex, :] + prior_w = me_prior[2] - me_prior[0] + prior_h = me_prior[3] - me_prior[1] + prior_w_center = (me_prior[2] + me_prior[0]) / 2 + prior_h_center = (me_prior[3] + me_prior[1]) / 2 + lmk_decode = np.zeros((10)) + for j in [0, 2, 4, 6, 8]: + lmk_decode[j] = lmk_pred[j] * 0.1 * prior_w + prior_w_center + for j in [1, 3, 5, 7, 9]: + lmk_decode[j] = lmk_pred[j] * 0.1 * prior_h + prior_h_center + im_shape = t['im_shape'][0][a].tolist() + image_h, image_w = int(im_shape[0]), int(im_shape[1]) + if is_bbox_normalized: + lmk_decode = lmk_decode * np.array([ + image_w, image_h, image_w, image_h, image_w, image_h, + image_w, image_h, image_w, image_h + ]) + lmk_res = { + 'image_id': im_id, + 'landmark': lmk_decode, + 'score': score, + } + xywh_res.append(lmk_res) + k += 1 + return xywh_res diff --git a/tools/infer.py b/tools/infer.py index e4ee0a7cc2a63809ee6cfd8c073eba66d502366d..f7783d20b5e892aea5ac7fabbe16707597548184 100644 --- a/tools/infer.py +++ b/tools/infer.py @@ -144,7 +144,7 @@ def main(): if cfg.metric == "VOC": from ppdet.utils.voc_eval import bbox2out, get_category_info if cfg.metric == "WIDERFACE": - from ppdet.utils.widerface_eval_utils import bbox2out, get_category_info + from ppdet.utils.widerface_eval_utils import bbox2out, lmk2out, get_category_info anno_file = dataset.get_anno() with_background = dataset.with_background @@ -181,11 +181,14 @@ def main(): bbox_results = None mask_results = None + lmk_results = None if 'bbox' in res: bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized) if 'mask' in res: mask_results = mask2out([res], clsid2catid, model.mask_head.resolution) + if 'landmark' in res: + lmk_results = lmk2out([res], is_bbox_normalized) # visualize result im_ids = res['im_id'][0] @@ -203,7 +206,7 @@ def main(): image = visualize_results(image, int(im_id), catid2name, FLAGS.draw_threshold, bbox_results, - mask_results) + mask_results, lmk_results) # use VisualDL to log image with bbox if FLAGS.use_vdl: @@ -253,4 +256,4 @@ if __name__ == '__main__': default="vdl_log_dir/image", help='VisualDL logging directory for image.') FLAGS = parser.parse_args() - main() + main() \ No newline at end of file