diff --git a/deploy/python/README.md b/deploy/python/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b9a8cc35eb325853dba329b0b1c75c44656b016b --- /dev/null +++ b/deploy/python/README.md @@ -0,0 +1,72 @@ +## PaddleDetection Python 预测部署方案 +本篇教程使用AnalysisPredictor对[导出模型](../../docs/advanced_tutorials/inference/EXPORT_MODEL.md)进行高性能预测。 + +在PaddlePaddle中预测引擎和训练引擎底层有着不同的优化方法, 下面列出了两种不同的预测方式。Executor同时支持训练和预测,AnalysisPredictor则专门针对推理进行了优化,是基于[C++预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/native_infer.html)的Python接口,该引擎可以对模型进行多项图优化,减少不必要的内存拷贝。如果用户在部署已训练模型的过程中对性能有较高的要求,于是我们提供了独立于PaddleDetection的预测脚本,方便用户直接集成部署。 + +- Executor:[Executor](https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/basic_concept/executor.html#executor) +- AnalysisPredictor:[AnalysisPredictor](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/python_infer_cn.html#analysispredictor) + + +主要包含两个步骤: + +- 导出预测模型 +- 基于Python的预测 + +## 1. 导出预测模型 + +PaddleDetection在训练过程包括网络的前向和优化器相关参数,而在部署过程中,我们只需要前向参数,具体参考:[导出模型](../../docs/advanced_tutorials/inference/EXPORT_MODEL.md) + +导出后目录下,包括`__model__`,`__params__`和`infer_cfg.yml`三个文件。 + +## 2. 基于python的预测 +### 2.1 安装依赖 + - `PaddlePaddle`的安装: + 请点击[官方安装文档](https://paddlepaddle.org.cn/install/quick) 选择适合的方式,版本为1.7以上即可 + - 切换到`PaddleDetection`代码库根目录,执行`pip install -r requirements.txt`安装其它依赖 + +### 2.2 执行预测程序 +在终端输入以下命令进行预测: + +```bash +python infer.py --models_dir=/path/to/models --image_file=/path/to/image +--use_gpu=(False/True) +``` + +参数说明如下: + +| 参数 | 是否必须|含义 | +|-------|-------|----------| +| --models_dir | Yes|上述导出的模型路径 | +| --image_file | Yes |需要预测的图片 | +| --video_file | Yes |需要预测的视频 | +| --use_gpu |No|是否GPU,默认为False| +| --threshold |No|预测得分的阈值,默认为0.5| +| --visualize |No|是否可视化结果,默认为False| +| --output_dir |No|可视化结果保存的根目录,默认为output/| + + +## 3. 部署性能对比测试 +对比AnalysisPredictor相对Executor的推理速度 + +### 3.1 测试环境: + +- CUDA 9.0 +- CUDNN 7.5 +- PaddlePaddle 1.71 +- GPU: Tesla P40 + +### 3.2 测试方式: + +- Batch Size=1 +- 去掉前100轮warmup时间,测试100轮的平均时间,单位ms/image,只计算模型运行时间,不包括数据的处理和拷贝。 + + +### 3.3 测试结果 + +|模型 | AnalysisPredictor | Executor | 输入| +|---|----|---|---| +| YOLOv3-MobileNetv1 | 15.20 | 19.54 | 608*608 +| faster_rcnn_r50_fpn_1x | 50.05 | 69.58 |800*1088 +| faster_rcnn_r50_1x | 326.11 | 347.22 | 800*1067 +| mask_rcnn_r50_fpn_1x | 67.49 | 91.02 | 800*1088 +| mask_rcnn_r50_1x | 326.11 | 350.94 | 800*1067 diff --git a/deploy/python/infer.py b/deploy/python/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..8e7a261f24406ef2f8b64b698bd98a3ba70203ce --- /dev/null +++ b/deploy/python/infer.py @@ -0,0 +1,556 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import yaml +from PIL import Image +import cv2 +import numpy as np +import paddle.fluid as fluid +from visualize import visualize_box_mask + + +def decode_image(im_file, im_info): + """read rgb image + Args: + im_file (str/np.ndarray): path of image/ np.ndarray read by cv2 + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + if isinstance(im_file, str): + with open(im_file, 'rb') as f: + im_read = f.read() + data = np.frombuffer(im_read, dtype='uint8') + im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + im_info['origin_shape'] = im.shape[:2] + im_info['resize_shape'] = im.shape[:2] + else: + im = im_file + im_info['origin_shape'] = im.shape[:2] + im_info['resize_shape'] = im.shape[:2] + return im, im_info + + +class Resize(object): + """resize image by target_size and max_size + Args: + arch (str): model type + target_size (int): the target size of image + max_size (int): the max size of image + use_cv2 (bool): whether us cv2 + image_shape (list): input shape of model + interp (int): method of resize + """ + + def __init__(self, + arch, + target_size, + max_size, + use_cv2=True, + image_shape=None, + interp=cv2.INTER_LINEAR): + self.target_size = target_size + self.max_size = max_size + self.image_shape = image_shape, + self.arch = arch + self.use_cv2 = use_cv2 + self.interp = interp + self.scale_set = {'RCNN', 'RetinaNet'} + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + im_channel = im.shape[2] + im_scale_x, im_scale_y = self.generate_scale(im) + if self.use_cv2: + im = cv2.resize( + im, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=self.interp) + else: + resize_w = int(im_scale_x * float(im.shape[1])) + resize_h = int(im_scale_y * float(im.shape[0])) + if self.max_size != 0: + raise TypeError( + 'If you set max_size to cap the maximum size of image,' + 'please set use_cv2 to True to resize the image.') + im = im.astype('uint8') + im = Image.fromarray(im) + im = im.resize((int(resize_w), int(resize_h)), self.interp) + im = np.array(im) + + # padding im when image_shape fixed by infer_cfg.yml + if self.max_size != 0 and self.image_shape is not None: + padding_im = np.zeros( + (self.max_size, self.max_size, im_channel), dtype=np.float32) + im_h, im_w = im.shape[:2] + padding_im[:im_h, :im_w, :] = im + im = padding_im + + if self.arch in self.scale_set: + im_info['scale'] = im_scale_x + im_info['resize_shape'] = im.shape[:2] + return im, im_info + + def generate_scale(self, im): + """ + Args: + im (np.ndarray): image (np.ndarray) + Returns: + im_scale_x: the resize ratio of X + im_scale_y: the resize ratio of Y + """ + origin_shape = im.shape[:2] + im_c = im.shape[2] + if self.max_size != 0 and self.arch in self.scale_set: + im_size_min = np.min(origin_shape[0:2]) + im_size_max = np.max(origin_shape[0:2]) + im_scale = float(self.target_size) / float(im_size_min) + if np.round(im_scale * im_size_max) > self.max_size: + im_scale = float(self.max_size) / float(im_size_max) + im_scale_x = im_scale + im_scale_y = im_scale + else: + im_scale_x = float(self.target_size) / float(origin_shape[1]) + im_scale_y = float(self.target_size) / float(origin_shape[0]) + return im_scale_x, im_scale_y + + +class Normalize(object): + """normalize image + Args: + mean (list): im - mean + std (list): im / std + is_scale (bool): whether need im / 255 + is_channel_first (bool): if True: image shape is CHW, else: HWC + """ + + def __init__(self, mean, std, is_scale=True, is_channel_first=False): + self.mean = mean + self.std = std + self.is_scale = is_scale + self.is_channel_first = is_channel_first + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + im = im.astype(np.float32, copy=False) + if self.is_channel_first: + mean = np.array(self.mean)[:, np.newaxis, np.newaxis] + std = np.array(self.std)[:, np.newaxis, np.newaxis] + else: + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + if self.is_scale: + im = im / 255.0 + im -= mean + im /= std + return im, im_info + + +class Permute(object): + """permute image + Args: + to_bgr (bool): whether convert RGB to BGR + channel_first (bool): whether convert HWC to CHW + """ + + def __init__(self, to_bgr=False, channel_first=True): + self.to_bgr = to_bgr + self.channel_first = channel_first + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + if self.channel_first: + im = im.transpose((2, 0, 1)).copy() + if self.to_bgr: + im = im[[2, 1, 0], :, :] + return im, im_info + + +class PadStride(object): + """ padding image for model with FPN + Args: + stride (bool): model with FPN need image shape % stride == 0 + """ + + def __init__(self, stride=0): + self.coarsest_stride = stride + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + coarsest_stride = self.coarsest_stride + if coarsest_stride == 0: + return im + im_c, im_h, im_w = im.shape + pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride) + pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride) + padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32) + padding_im[:, :im_h, :im_w] = im + im_info['resize_shape'] = padding_im.shape[1:] + return padding_im, im_info + + +def create_inputs(im, im_info, model_arch='YOLO'): + """generate input for different model type + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + model_arch (str): model type + Returns: + inputs (dict): input of model + """ + inputs = {} + inputs['image'] = im + origin_shape = list(im_info['origin_shape']) + resize_shape = list(im_info['resize_shape']) + scale = im_info['scale'] + if 'YOLO' in model_arch: + im_size = np.array([origin_shape]).astype('int32') + inputs['im_size'] = im_size + elif 'RetinaNet' in model_arch: + im_info = np.array([resize_shape + [scale]]).astype('float32') + inputs['im_info'] = im_info + elif 'RCNN' in model_arch: + im_info = np.array([resize_shape + [scale]]).astype('float32') + im_shape = np.array([origin_shape + [1.]]).astype('float32') + inputs['im_info'] = im_info + inputs['im_shape'] = im_shape + return inputs + + +class Config(): + """set config of preprocess, postprocess and visualize + Args: + model_dir (str): root path of model.yml + """ + support_models = ['YOLO', 'SSD', 'RetinaNet', 'RCNN'] + + def __init__(self, model_dir): + # parsing Yaml config for Preprocess + deploy_file = os.path.join(model_dir, 'infer_cfg.yml') + with open(deploy_file) as f: + yml_conf = yaml.safe_load(f) + self.check_model(yml_conf) + self.arch = yml_conf['arch'] + self.preprocess_infos = yml_conf['Preprocess'] + self.use_python_inference = yml_conf['use_python_inference'] + self.run_mode = yml_conf['mode'] + self.min_subgraph_size = yml_conf['min_subgraph_size'] + self.labels = yml_conf['label_list'] + if not yml_conf['with_background']: + self.labels = self.labels[1:] + self.mask_resolution = None + if 'mask_resolution' in yml_conf: + self.mask_resolution = yml_conf['mask_resolution'] + + def check_model(self, yml_conf): + """ + Raises: + ValueError: loaded model not in supported model type + """ + for support_model in self.support_models: + if support_model in yml_conf['arch']: + return True + raise ValueError( + "Unsupported arch: {}, expect SSD, YOLO, RetinaNet and RCNN".format( + yml_conf['arch'])) + + +def load_predictor(model_dir, + run_mode='fluid', + batch_size=1, + use_gpu=False, + min_subgraph_size=3): + """set AnalysisConfig,generate AnalysisPredictor + Args: + model_dir (str): root path of __model__ and __params__ + use_gpu (bool): whether use gpu + Returns: + predictor (PaddlePredictor): AnalysisPredictor + Raises: + ValueError: predict by TensorRT need use_gpu == True + """ + if not use_gpu and run_mode == 'fluid': + raise ValueError( + "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}" + .format(run_mode, use_gpu)) + precision_map = { + 'trt_int8': fluid.core.AnalysisConfig.Precision.Int8, + 'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32, + 'trt_fp16': fluid.core.AnalysisConfig.Precision.Half + } + config = fluid.core.AnalysisConfig( + os.path.join(model_dir, '__model__'), + os.path.join(model_dir, '__params__')) + if use_gpu: + # initial GPU memory(M), device ID + config.enable_use_gpu(100, 0) + # optimize graph and fuse op + config.switch_ir_optim(True) + else: + config.disable_gpu() + + if run_mode in precision_map.keys(): + config.enable_tensorrt_engine( + workspace_size=1 << 30, + max_batch_size=batch_size, + min_subgraph_size=min_subgraph_size, + precision_mode=precision_map[run_mode], + use_static=False, + use_calib_mode=run_mode == 'trt_int8') + + # disable print log when predict + config.disable_glog_info() + # enable shared memory + config.enable_memory_optim() + # disable feed, fetch OP,needed by zero_copy_run + config.switch_use_feed_fetch_ops(False) + predictor = fluid.core.create_paddle_predictor(config) + return predictor + + +def load_executor(model_dir, use_gpu=False): + if use_gpu: + place = fluid.CUDAPlace(0) + else: + place = fluid.CPUPlace() + exe = fluid.Executor(place) + program, feed_names, fetch_targets = fluid.io.load_inference_model( + dirname=model_dir, + executor=exe, + model_filename='__model__', + params_filename='__params__') + return exe, program, fetch_targets + + +def visualize(image_file, + results, + labels, + mask_resolution=14, + output_dir='output/'): + # visualize the predict result + im = visualize_box_mask( + image_file, results, labels, mask_resolution=mask_resolution) + img_name = os.path.split(image_file)[-1] + if not os.path.exists(output_dir): + os.makedirs(output_dir) + out_path = os.path.join(output_dir, img_name) + im.save(out_path, quality=95) + print("save result to: " + out_path) + + +class Detector(): + """ + Args: + model_dir (str): root path of __model__, __params__ and infer_cfg.yml + use_gpu (bool): whether use gpu + """ + + def __init__(self, model_dir, use_gpu=False, threshold=0.5): + self.config = Config(model_dir) + if self.config.use_python_inference: + self.executor, self.program, self.fecth_targets = load_executor( + model_dir, use_gpu=use_gpu) + else: + self.predictor = load_predictor( + model_dir, + run_mode=self.config.run_mode, + min_subgraph_size=self.config.min_subgraph_size, + use_gpu=use_gpu) + self.preprocess_ops = [] + for op_info in self.config.preprocess_infos: + op_type = op_info.pop('type') + if op_type == 'Resize': + op_info['arch'] = self.config.arch + self.preprocess_ops.append(eval(op_type)(**op_info)) + + def preprocess(self, im): + # process image by preprocess_ops + im_info = { + 'scale': 1., + 'origin_shape': None, + 'resize_shape': None, + } + im, im_info = decode_image(im, im_info) + for operator in self.preprocess_ops: + im, im_info = operator(im, im_info) + im = np.array((im, )).astype('float32') + inputs = create_inputs(im, im_info, self.config.arch) + return inputs, im_info + + def postprocess(self, np_boxes, np_masks, im_info, threshold=0.5): + # postprocess output of predictor + results = {} + if 'SSD' in self.config.arch: + w, h = im_info['origin_shape'] + np_boxes[:, 2] *= h + np_boxes[:, 3] *= w + np_boxes[:, 4] *= h + np_boxes[:, 5] *= w + expect_boxes = np_boxes[:, 1] > threshold + np_boxes = np_boxes[expect_boxes, :] + for box in np_boxes: + print('class_id:{:d}, confidence:{:.2f},' + 'left_top:[{:.2f},{:.2f}],' + ' right_bottom:[{:.2f},{:.2f}]'.format( + int(box[0]), box[1], box[2], box[3], box[4], box[5])) + results['boxes'] = np_boxes + if np_masks is not None: + np_masks = np_masks[expect_boxes, :, :, :] + results['masks'] = np_masks + return results + + def predict(self, image, threshold=0.5): + ''' + Args: + image (str/np.ndarray): path of image/ np.ndarray read by cv2 + threshold (float): threshold of predicted box' score + Returns: + results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, + matix element:[class, score, x_min, y_min, x_max, y_max] + MaskRCNN's results include 'masks': np.ndarray: + shape:[N, class_num, mask_resolution, mask_resolution] + ''' + inputs, im_info = self.preprocess(image) + np_boxes, np_masks = None, None + if self.config.use_python_inference: + outs = self.executor.run(self.program, + feed=inputs, + fetch_list=self.fecth_targets, + return_numpy=False) + np_boxes = np.array(outs[0]) + if self.config.mask_resolution is not None: + np_masks = np.arrya(outs[1]) + else: + input_names = self.predictor.get_input_names() + for i in range(len(inputs)): + input_tensor = self.predictor.get_input_tensor(input_names[i]) + input_tensor.copy_from_cpu(inputs[input_names[i]]) + self.predictor.zero_copy_run() + output_names = self.predictor.get_output_names() + boxes_tensor = self.predictor.get_output_tensor(output_names[0]) + np_boxes = boxes_tensor.copy_to_cpu() + if self.config.mask_resolution is not None: + masks_tensor = self.predictor.get_output_tensor(output_names[1]) + np_masks = masks_tensor.copy_to_cpu() + results = self.postprocess( + np_boxes, np_masks, im_info, threshold=threshold) + return results + + +def predict_image(): + detector = Detector(FLAGS.model_dir, use_gpu=FLAGS.use_gpu) + results = detector.predict(FLAGS.image_file, FLAGS.threshold) + visualize( + FLAGS.image_file, + results, + detector.config.labels, + mask_resolution=detector.config.mask_resolution, + output_dir=FLAGS.output_dir) + + +def predict_video(): + detector = Detector(FLAGS.model_dir, use_gpu=FLAGS.use_gpu) + capture = cv2.VideoCapture(FLAGS.video_file) + fps = 30 + width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + video_name = os.path.split(FLAGS.video_file)[-1] + if not os.path.exists(FLAGS.output_dir): + os.makedirs(FLAGES.output_dir) + out_path = os.path.join(FLAGS.output_dir, video_name) + writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) + index = 1 + while (1): + ret, frame = capture.read() + if not ret: + break + print('detect frame:%d' % (index)) + index += 1 + results = detector.predict(frame, FLAGS.threshold) + im = visualize_box_mask( + frame, + results, + detector.config.labels, + mask_resolution=detector.config.mask_resolution) + im = np.array(im) + writer.write(im) + writer.release() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--model_dir", + type=str, + default=None, + help=("Directory include:'__model__', '__params__', " + "'infer_cfg.yml', created by export_model."), + required=True) + parser.add_argument( + "--image_file", type=str, default='', help="Path of image file.") + parser.add_argument( + "--video_file", type=str, default='', help="Path of video file.") + parser.add_argument( + "--use_gpu", default=False, help="Whether to predict with GPU.") + parser.add_argument( + "--threshold", type=float, default=0.5, help="Threshold of score.") + parser.add_argument( + "--output_dir", + type=str, + default="output", + help="Directory of output visualization files.") + + FLAGS = parser.parse_args() + if FLAGS.image_file != '' and FLAGS.video_file != '': + assert "Cannot predict image and video at the same time" + if FLAGS.image_file != '': + predict_image() + if FLAGS.video_file != '': + predict_video() diff --git a/deploy/python/visualize.py b/deploy/python/visualize.py new file mode 100644 index 0000000000000000000000000000000000000000..f58bee315010f03b39674e7df453c0b654244ae7 --- /dev/null +++ b/deploy/python/visualize.py @@ -0,0 +1,186 @@ +# coding: utf-8 +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cv2 +import numpy as np +from PIL import Image, ImageDraw + + +def visualize_box_mask(im, results, labels, mask_resolution=14): + """ + Args: + im (str/np.ndarray): path of image/np.ndarray read by cv2 + results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, + matix element:[class, score, x_min, y_min, x_max, y_max] + MaskRCNN's results include 'masks': np.ndarray: + shape:[N, class_num, mask_resolution, mask_resolution] + labels (list): labels:['class1', ..., 'classn'] + mask_resolution (int): shape of a mask is:[mask_resolution, mask_resolution] + Returns: + im (PIL.Image.Image): visualized image + """ + if im is str: + im = Image.open(im).convert('RGB') + else: + im = Image.fromarray(im) + if 'masks' in results and 'boxes' in results: + im = draw_mask( + im, + results['boxes'], + results['masks'], + labels, + resolution=mask_resolution) + if 'boxes' in results: + im = draw_box(im, results['boxes'], labels) + return im + + +def get_color_map_list(num_classes): + """ + Args: + num_classes (int): number of class + Returns: + color_map (list): RGB color list + """ + color_map = num_classes * [0, 0, 0] + for i in range(0, num_classes): + j = 0 + lab = i + while lab: + color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j)) + color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)) + color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)) + j += 1 + lab >>= 3 + color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)] + return color_map + + +def expand_boxes(boxes, scale=0.0): + """ + Args: + boxes (np.ndarray): shape:[N,4], N:number of box, + matix element:[x_min, y_min, x_max, y_max] + scale (float): scale of boxes + Returns: + boxes_exp (np.ndarray): expanded boxes + """ + w_half = (boxes[:, 2] - boxes[:, 0]) * .5 + h_half = (boxes[:, 3] - boxes[:, 1]) * .5 + x_c = (boxes[:, 2] + boxes[:, 0]) * .5 + y_c = (boxes[:, 3] + boxes[:, 1]) * .5 + w_half *= scale + h_half *= scale + boxes_exp = np.zeros(boxes.shape) + boxes_exp[:, 0] = x_c - w_half + boxes_exp[:, 2] = x_c + w_half + boxes_exp[:, 1] = y_c - h_half + boxes_exp[:, 3] = y_c + h_half + return boxes_exp + + +def draw_mask(im, np_boxes, np_masks, labels, resolution=14, threshold=0.5): + """ + Args: + im (PIL.Image.Image): PIL image + np_boxes (np.ndarray): shape:[N,6], N: number of box, + matix element:[class, score, x_min, y_min, x_max, y_max] + np_masks (np.ndarray): shape:[N, class_num, resolution, resolution] + labels (list): labels:['class1', ..., 'classn'] + resolution (int): shape of a mask is:[resolution, resolution] + threshold (float): threshold of mask + Returns: + im (PIL.Image.Image): visualized image + """ + color_list = get_color_map_list(len(labels)) + scale = (resolution + 2.0) / resolution + im_w, im_h = im.size + w_ratio = 0.4 + alpha = 0.7 + im = np.array(im).astype('float32') + rects = np_boxes[:, 2:] + expand_rects = expand_boxes(rects, scale) + expand_rects = expand_rects.astype(np.int32) + clsid_scores = np_boxes[:, 0:2] + padded_mask = np.zeros((resolution + 2, resolution + 2), dtype=np.float32) + clsid2color = {} + for idx in range(len(np_boxes)): + clsid, score = clsid_scores[idx].tolist() + clsid = int(clsid) + xmin, ymin, xmax, ymax = expand_rects[idx].tolist() + w = xmax - xmin + 1 + h = ymax - ymin + 1 + w = np.maximum(w, 1) + h = np.maximum(h, 1) + padded_mask[1:-1, 1:-1] = np_masks[idx, int(clsid), :, :] + resized_mask = cv2.resize(padded_mask, (w, h)) + resized_mask = np.array(resized_mask > threshold, dtype=np.uint8) + x0 = min(max(xmin, 0), im_w) + x1 = min(max(xmax + 1, 0), im_w) + y0 = min(max(ymin, 0), im_h) + y1 = min(max(ymax + 1, 0), im_h) + im_mask = np.zeros((im_h, im_w), dtype=np.uint8) + im_mask[y0:y1, x0:x1] = resized_mask[(y0 - ymin):(y1 - ymin), ( + x0 - xmin):(x1 - xmin)] + if clsid not in clsid2color: + clsid2color[clsid] = color_list[clsid] + color_mask = clsid2color[clsid] + for c in range(3): + color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio * 255 + idx = np.nonzero(im_mask) + color_mask = np.array(color_mask) + im[idx[0], idx[1], :] *= 1.0 - alpha + im[idx[0], idx[1], :] += alpha * color_mask + return Image.fromarray(im.astype('uint8')) + + +def draw_box(im, np_boxes, labels): + """ + Args: + im (PIL.Image.Image): PIL image + np_boxes (np.ndarray): shape:[N,6], N: number of box, + matix element:[class, score, x_min, y_min, x_max, y_max] + labels (list): labels:['class1', ..., 'classn'] + Returns: + im (PIL.Image.Image): visualized image + """ + draw_thickness = min(im.size) / 320 + draw = ImageDraw.Draw(im) + clsid2color = {} + color_list = get_color_map_list(len(labels)) + + for dt in np_boxes: + clsid, bbox, score = int(dt[0]), dt[2:], dt[1] + xmin, ymin, xmax, ymax = bbox + w = xmax - xmin + h = ymax - ymin + if clsid not in clsid2color: + clsid2color[clsid] = color_list[clsid] + color = tuple(clsid2color[clsid]) + + # draw bbox + draw.line( + [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin), + (xmin, ymin)], + width=draw_thickness, + fill=color) + + # draw label + text = "{} {:.2f}".format(labels[clsid], score) + tw, th = draw.textsize(text) + draw.rectangle( + [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color) + draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255)) + return im diff --git a/requirements.txt b/requirements.txt index c7d9d8093ad476856dcf643c08b10f8c5f252c30..2e0299ba1e2d4c9d434df4b4afa2974234dad4a5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,6 @@ tb-paddle tensorboard >= 1.15 cython pycocotools +opencv-python +PyYAML shapely