diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/module.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/module.py index 7b26eab37cca96315665215e0804fbe659967102..a2c76ec4d681725f8609f79e0c7713bc11058d7a 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/module.py +++ b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/module.py @@ -14,7 +14,10 @@ import numpy as np import paddle.fluid as fluid import paddlehub as hub from paddlehub.module.module import moduleinfo, runnable, serving -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor + +from paddle.inference import Config +from paddle.inference import create_predictor + from paddlehub.io.parser import txt_parser from paddlehub.common.paddle_helper import add_vars_prefix @@ -31,45 +34,65 @@ from faster_rcnn_resnet50_coco2017.roi_extractor import RoIAlign name="faster_rcnn_resnet50_coco2017", version="1.1.1", type="cv/object_detection", - summary= - "Baidu's Faster R-CNN model for object detection with backbone ResNet50, trained with dataset COCO2017", + summary="Baidu's Faster R-CNN model for object detection with backbone ResNet50, trained with dataset COCO2017", author="paddlepaddle", author_email="paddle-dev@baidu.com") class FasterRCNNResNet50(hub.Module): def _initialize(self): # default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333] - self.default_pretrained_model_path = os.path.join( - self.directory, "faster_rcnn_resnet50_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) + self.default_pretrained_model_path = os.path.join(self.directory, "faster_rcnn_resnet50_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) self._set_config() + def _get_device_id(self, places): + try: + places = os.environ[places] + id = int(places) + except: + id = -1 + return id + def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + + # create default cpu predictor + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + self.cpu_predictor = create_predictor(cpu_config) + + # create predictors using various types of devices + + # npu + npu_id = self._get_device_id("FLAGS_selected_npus") + if npu_id != -1: + # use npu + npu_config = Config(self.default_pretrained_model_path) + npu_config.disable_glog_info() + npu_config.enable_npu(device_id=npu_id) + self.npu_predictor = create_predictor(npu_config) + + # gpu + gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES") + if gpu_id != -1: + # use gpu + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, - num_classes=81, - trainable=True, - pretrained=True, - phase='train'): + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=gpu_id) + self.gpu_predictor = create_predictor(gpu_config) + + # xpu + xpu_id = self._get_device_id("XPU_VISIBLE_DEVICES") + if xpu_id != -1: + # use xpu + xpu_config = Config(self.default_pretrained_model_path) + xpu_config.disable_glog_info() + xpu_config.enable_xpu(100) + self.xpu_predictor = create_predictor(xpu_config) + + def context(self, num_classes=81, trainable=True, pretrained=True, phase='train'): """ Distill the Head Features, so as to perform transfer learning. @@ -88,34 +111,24 @@ class FasterRCNNResNet50(hub.Module): startup_program = fluid.Program() with fluid.program_guard(context_prog, startup_program): with fluid.unique_name.guard(): - image = fluid.layers.data( - name='image', shape=[-1, 3, -1, -1], dtype='float32') + image = fluid.layers.data(name='image', shape=[-1, 3, -1, -1], dtype='float32') # backbone - backbone = ResNet( - norm_type='affine_channel', - depth=50, - feature_maps=4, - freeze_at=2) + backbone = ResNet(norm_type='affine_channel', depth=50, feature_maps=4, freeze_at=2) body_feats = backbone(image) # var_prefix var_prefix = '@HUB_{}@'.format(self.name) - im_info = fluid.layers.data( - name='im_info', shape=[3], dtype='float32', lod_level=0) - im_shape = fluid.layers.data( - name='im_shape', shape=[3], dtype='float32', lod_level=0) + im_info = fluid.layers.data(name='im_info', shape=[3], dtype='float32', lod_level=0) + im_shape = fluid.layers.data(name='im_shape', shape=[3], dtype='float32', lod_level=0) body_feat_names = list(body_feats.keys()) # rpn_head: RPNHead rpn_head = self.rpn_head() rois = rpn_head.get_proposals(body_feats, im_info, mode=phase) # train if phase == 'train': - gt_bbox = fluid.layers.data( - name='gt_bbox', shape=[4], dtype='float32', lod_level=1) - is_crowd = fluid.layers.data( - name='is_crowd', shape=[1], dtype='int32', lod_level=1) - gt_class = fluid.layers.data( - name='gt_class', shape=[1], dtype='int32', lod_level=1) + gt_bbox = fluid.layers.data(name='gt_bbox', shape=[4], dtype='float32', lod_level=1) + is_crowd = fluid.layers.data(name='is_crowd', shape=[1], dtype='int32', lod_level=1) + gt_class = fluid.layers.data(name='gt_class', shape=[1], dtype='int32', lod_level=1) rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd) # bbox_assigner: BBoxAssigner bbox_assigner = self.bbox_assigner(num_classes) @@ -160,18 +173,13 @@ class FasterRCNNResNet50(hub.Module): 'is_crowd': var_prefix + is_crowd.name } outputs = { - 'head_features': - var_prefix + head_feat.name, - 'rpn_cls_loss': - var_prefix + rpn_loss['rpn_cls_loss'].name, - 'rpn_reg_loss': - var_prefix + rpn_loss['rpn_reg_loss'].name, - 'generate_proposal_labels': - [var_prefix + var.name for var in outs] + 'head_features': var_prefix + head_feat.name, + 'rpn_cls_loss': var_prefix + rpn_loss['rpn_cls_loss'].name, + 'rpn_reg_loss': var_prefix + rpn_loss['rpn_reg_loss'].name, + 'generate_proposal_labels': [var_prefix + var.name for var in outs] } elif phase == 'predict': - pred = bbox_head.get_prediction(roi_feat, rois, im_info, - im_shape) + pred = bbox_head.get_prediction(roi_feat, rois, im_info, im_shape) inputs = { 'image': var_prefix + image.name, 'im_info': var_prefix + im_info.name, @@ -186,13 +194,9 @@ class FasterRCNNResNet50(hub.Module): add_vars_prefix(startup_program, var_prefix) global_vars = context_prog.global_block().vars - inputs = { - key: global_vars[value] - for key, value in inputs.items() - } + inputs = {key: global_vars[value] for key, value in inputs.items()} outputs = { - key: global_vars[value] if not isinstance(value, list) else - [global_vars[var] for var in value] + key: global_vars[value] if not isinstance(value, list) else [global_vars[var] for var in value] for key, value in outputs.items() } @@ -208,14 +212,9 @@ class FasterRCNNResNet50(hub.Module): if num_classes != 81: if 'bbox_pred' in var.name or 'cls_score' in var.name: return False - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) return inputs, outputs, context_prog def rpn_head(self): @@ -231,16 +230,8 @@ class FasterRCNNResNet50(hub.Module): rpn_negative_overlap=0.3, rpn_positive_overlap=0.7, rpn_straddle_thresh=0.0), - train_proposal=GenerateProposals( - min_size=0.0, - nms_thresh=0.7, - post_nms_top_n=12000, - pre_nms_top_n=2000), - test_proposal=GenerateProposals( - min_size=0.0, - nms_thresh=0.7, - post_nms_top_n=6000, - pre_nms_top_n=1000)) + train_proposal=GenerateProposals(min_size=0.0, nms_thresh=0.7, post_nms_top_n=12000, pre_nms_top_n=2000), + test_proposal=GenerateProposals(min_size=0.0, nms_thresh=0.7, post_nms_top_n=6000, pre_nms_top_n=1000)) def roi_extractor(self): return RoIAlign(resolution=14, sampling_ratio=0, spatial_scale=0.0625) @@ -248,8 +239,7 @@ class FasterRCNNResNet50(hub.Module): def bbox_head(self, num_classes): return BBoxHead( head=ResNetC5(depth=50, norm_type='affine_channel'), - nms=MultiClassNMS( - keep_top_k=100, nms_threshold=0.5, score_threshold=0.05), + nms=MultiClassNMS(keep_top_k=100, nms_threshold=0.5, score_threshold=0.05), bbox_loss=SmoothL1Loss(), num_classes=num_classes) @@ -263,11 +253,7 @@ class FasterRCNNResNet50(hub.Module): fg_thresh=0.5, class_nums=num_classes) - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename @@ -294,7 +280,8 @@ class FasterRCNNResNet50(hub.Module): batch_size=1, output_dir='detection_result', score_thresh=0.5, - visualization=True): + visualization=True, + use_device=None): """API of Object Detection. Args: @@ -305,6 +292,7 @@ class FasterRCNNResNet50(hub.Module): output_dir (str): The path to store output images. visualization (bool): Whether to save image or not. score_thresh (float): threshold for object detecion. + use_device (str): use cpu, gpu, xpu or npu, overwrites use_gpu flag. Returns: res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: @@ -317,14 +305,25 @@ class FasterRCNNResNet50(hub.Module): confidence (float): The confidence of detection result. save_path (str, optional): The path to save output images. """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Attempt to use GPU for prediction, but environment variable CUDA_VISIBLE_DEVICES was not set correctly." - ) + # real predictor to use + if use_device is not None: + if use_device == "cpu": + predictor = self.cpu_predictor + elif use_device == "xpu": + predictor = self.xpu_predictor + elif use_device == "npu": + predictor = self.npu_predictor + elif use_device == "gpu": + predictor = self.gpu_predictor + else: + raise Exception("Unsupported device: " + use_device) + else: + # use_device is not set, therefore follow use_gpu + if use_gpu: + predictor = self.gpu_predictor + else: + predictor = self.cpu_predictor + paths = paths if paths else list() if data and 'image' in data: paths += data['image'] @@ -345,22 +344,30 @@ class FasterRCNNResNet50(hub.Module): except: pass - padding_image, padding_info, padding_shape = padding_minibatch( - batch_data) - padding_image_tensor = PaddleTensor(padding_image.copy()) - padding_info_tensor = PaddleTensor(padding_info.copy()) - padding_shape_tensor = PaddleTensor(padding_shape.copy()) - feed_list = [ - padding_image_tensor, padding_info_tensor, padding_shape_tensor - ] - if use_gpu: - data_out = self.gpu_predictor.run(feed_list) - else: - data_out = self.cpu_predictor.run(feed_list) + padding_image, padding_info, padding_shape = padding_minibatch(batch_data) + + input_names = predictor.get_input_names() + + padding_image_tensor = predictor.get_input_handle(input_names[0]) + padding_image_tensor.reshape(padding_image.shape) + padding_image_tensor.copy_from_cpu(padding_image.copy()) + + padding_info_tensor = predictor.get_input_handle(input_names[1]) + padding_info_tensor.reshape(padding_info.shape) + padding_info_tensor.copy_from_cpu(padding_info.copy()) + + padding_shape_tensor = predictor.get_input_handle(input_names[2]) + padding_shape_tensor.reshape(padding_shape.shape) + padding_shape_tensor.copy_from_cpu(padding_shape.copy()) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + output = postprocess( paths=paths, images=images, - data_out=data_out, + data_out=output_handle, score_thresh=score_thresh, label_names=self.label_names, output_dir=output_dir, @@ -374,29 +381,21 @@ class FasterRCNNResNet50(hub.Module): Add the command config options """ self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help="batch size for prediction") self.arg_config_group.add_argument( - '--batch_size', - type=int, - default=1, - help="batch size for prediction") + '--use_device', + choices=["cpu", "gpu", "xpu", "npu"], + help="use cpu, gpu, xpu or npu. overwrites use_gpu flag.") def add_module_input_arg(self): """ Add the command input options """ - self.arg_input_group.add_argument( - '--input_path', type=str, default=None, help="input data") + self.arg_input_group.add_argument('--input_path', type=str, default=None, help="input data") - self.arg_input_group.add_argument( - '--input_file', - type=str, - default=None, - help="file contain input data") + self.arg_input_group.add_argument('--input_file', type=str, default=None, help="file contain input data") def check_input_data(self, args): input_data = [] @@ -425,12 +424,9 @@ class FasterRCNNResNet50(hub.Module): prog="hub run {}".format(self.name), usage='%(prog)s', add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") + title="Config options", description="Run configuration for controlling module behavior, not required.") self.add_module_config_arg() self.add_module_input_arg() @@ -442,7 +438,6 @@ class FasterRCNNResNet50(hub.Module): else: for image_path in input_data: if not os.path.exists(image_path): - raise RuntimeError( - "File %s or %s is not exist." % image_path) + raise RuntimeError("File %s or %s is not exist." % image_path) return self.object_detection( - paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) + paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size, use_device=args.use_device) diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/processor.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/processor.py index 2b3e1ce9c6fd4ae85dd69a43f2a359192a92bac5..1dc3d2f3096e3950c40dd1768c9ab0807ebc9b0d 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/processor.py +++ b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/processor.py @@ -19,6 +19,7 @@ def base64_to_cv2(b64str): data = cv2.imdecode(data, cv2.IMREAD_COLOR) return data + def check_dir(dir_path): if not os.path.exists(dir_path): os.makedirs(dir_path) @@ -26,6 +27,7 @@ def check_dir(dir_path): os.remove(dir_path) os.makedirs(dir_path) + def get_save_image_name(img, output_dir, image_path): """Get save image name from source image path. """ @@ -54,23 +56,17 @@ def draw_bounding_box_on_image(image_path, data_list, save_dir): image = Image.open(image_path) draw = ImageDraw.Draw(image) for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') # draw label if image.mode == 'RGB': text = data['label'] + ": %.2f%%" % (100 * data['confidence']) textsize_width, textsize_height = draw.textsize(text=text) draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) save_name = get_save_image_name(image, save_dir, image_path) @@ -98,14 +94,7 @@ def load_label_info(file_path): return label_names -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): """ postprocess the lod_tensor produced by fluid.Executor.run @@ -130,9 +119,8 @@ def postprocess(paths, confidence (float): The confidence of detection result. save_path (str): The path to save output images. """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() + results = data_out.copy_to_cpu() + lod = data_out.lod()[0] check_dir(output_dir) @@ -162,9 +150,7 @@ def postprocess(paths, org_img = org_img.astype(np.uint8) org_img = Image.fromarray(org_img[:, :, ::-1]) if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) org_img.save(org_img_path) org_img_height = org_img.height org_img_width = org_img.width @@ -180,13 +166,11 @@ def postprocess(paths, dt = {} dt['label'] = label_names[category_id] dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) output_i['data'].append(dt) output.append(output_i) if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) return output diff --git a/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py b/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py index c732da92fefc4905b618ebdb2334dec68a88c4e4..ec682d9efc75090169a3b58c87aa21c14d87c6cd 100644 --- a/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py +++ b/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py @@ -10,7 +10,10 @@ import yaml import numpy as np import paddle.fluid as fluid import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor + +from paddle.inference import Config +from paddle.inference import create_predictor + from paddlehub.module.module import moduleinfo, runnable, serving from paddlehub.common.paddle_helper import add_vars_prefix @@ -28,32 +31,59 @@ from ssd_mobilenet_v1_pascal.data_feed import reader author_email="paddle-dev@baidu.com") class SSDMobileNetv1(hub.Module): def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "ssd_mobilenet_v1_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) + self.default_pretrained_model_path = os.path.join(self.directory, "ssd_mobilenet_v1_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) self.model_config = None self._set_config() + def _get_device_id(self, places): + try: + places = os.environ[places] + id = int(places) + except: + id = -1 + return id + def _set_config(self): - # predictor config setting. - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + """ + predictor config setting. + """ + + # create default cpu predictor + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + # create predictors using various types of devices + + # npu + npu_id = self._get_device_id("FLAGS_selected_npus") + if npu_id != -1: + # use npu + npu_config = Config(self.default_pretrained_model_path) + npu_config.disable_glog_info() + npu_config.enable_npu(device_id=npu_id) + self.npu_predictor = create_predictor(npu_config) + + # gpu + gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES") + if gpu_id != -1: + # use gpu + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=gpu_id) + self.gpu_predictor = create_predictor(gpu_config) + + # xpu + xpu_id = self._get_device_id("XPU_VISIBLE_DEVICES") + if xpu_id != -1: + # use xpu + xpu_config = Config(self.default_pretrained_model_path) + xpu_config.disable_glog_info() + xpu_config.enable_xpu(100) + self.xpu_predictor = create_predictor(xpu_config) # model config setting. if not self.model_config: @@ -83,55 +113,34 @@ class SSDMobileNetv1(hub.Module): with fluid.program_guard(context_prog, startup_program): with fluid.unique_name.guard(): # image - image = fluid.layers.data( - name='image', shape=[3, 300, 300], dtype='float32') + image = fluid.layers.data(name='image', shape=[3, 300, 300], dtype='float32') # backbone backbone = MobileNet(**self.mobilenet_config) # body_feats body_feats = backbone(image) # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') + im_size = fluid.layers.data(name='im_size', shape=[2], dtype='int32') # var_prefix var_prefix = '@HUB_{}@'.format(self.name) # names of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } + inputs = {'image': var_prefix + image.name, 'im_size': var_prefix + im_size.name} # names of outputs if get_prediction: locs, confs, box, box_var = fluid.layers.multi_box_head( - inputs=body_feats, - image=image, - num_classes=21, - **self.multi_box_head_config) + inputs=body_feats, image=image, num_classes=21, **self.multi_box_head_config) pred = fluid.layers.detection_output( - loc=locs, - scores=confs, - prior_box=box, - prior_box_var=box_var, - **self.output_decoder_config) + loc=locs, scores=confs, prior_box=box, prior_box_var=box_var, **self.output_decoder_config) outputs = {'bbox_out': [var_prefix + pred.name]} else: - outputs = { - 'body_features': - [var_prefix + var.name for var in body_feats] - } + outputs = {'body_features': [var_prefix + var.name for var in body_feats]} # add_vars_prefix add_vars_prefix(context_prog, var_prefix) add_vars_prefix(fluid.default_startup_program(), var_prefix) # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } + inputs = {key: context_prog.global_block().vars[value] for key, value in inputs.items()} outputs = { - out_key: [ - context_prog.global_block().vars[varname] - for varname in out_value - ] + out_key: [context_prog.global_block().vars[varname] for varname in out_value] for out_key, out_value in outputs.items() } # trainable @@ -144,14 +153,9 @@ class SSDMobileNetv1(hub.Module): if pretrained: def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) else: exe.run(startup_program) @@ -165,7 +169,8 @@ class SSDMobileNetv1(hub.Module): use_gpu=False, output_dir='detection_result', score_thresh=0.5, - visualization=True): + visualization=True, + use_device=None): """API of Object Detection. Args: @@ -176,6 +181,7 @@ class SSDMobileNetv1(hub.Module): output_dir (str): The path to store output images. visualization (bool): Whether to save image or not. score_thresh (float): threshold for object detecion. + use_device (str): use cpu, gpu, xpu or npu, overwrites use_gpu flag. Returns: res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: @@ -188,14 +194,24 @@ class SSDMobileNetv1(hub.Module): confidence (float): The confidence of detection result. save_path (str, optional): The path to save output images. """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Attempt to use GPU for prediction, but environment variable CUDA_VISIBLE_DEVICES was not set correctly." - ) + # real predictor to use + if use_device is not None: + if use_device == "cpu": + predictor = self.cpu_predictor + elif use_device == "xpu": + predictor = self.xpu_predictor + elif use_device == "npu": + predictor = self.npu_predictor + elif use_device == "gpu": + predictor = self.gpu_predictor + else: + raise Exception("Unsupported device: " + use_device) + else: + # use_device is not set, therefore follow use_gpu + if use_gpu: + predictor = self.gpu_predictor + else: + predictor = self.cpu_predictor paths = paths if paths else list() if data and 'image' in data: @@ -206,16 +222,22 @@ class SSDMobileNetv1(hub.Module): res = [] for iter_id, feed_data in enumerate(batch_reader()): feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0])).copy()) - if use_gpu: - data_out = self.gpu_predictor.run([image_tensor]) - else: - data_out = self.cpu_predictor.run([image_tensor]) + + input_names = predictor.get_input_names() + image_data = np.array(list(feed_data[:, 0])) + + image_tensor = predictor.get_input_handle(input_names[0]) + image_tensor.reshape(image_data.shape) + image_tensor.copy_from_cpu(image_data.copy()) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) output = postprocess( paths=paths, images=images, - data_out=data_out, + data_out=output_handle, score_thresh=score_thresh, label_names=self.label_names, output_dir=output_dir, @@ -224,11 +246,7 @@ class SSDMobileNetv1(hub.Module): res.extend(output) return res - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename @@ -266,12 +284,9 @@ class SSDMobileNetv1(hub.Module): prog='hub run {}'.format(self.name), usage='%(prog)s', add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") + title="Config options", description="Run configuration for controlling module behavior, not required.") self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) @@ -281,7 +296,8 @@ class SSDMobileNetv1(hub.Module): use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization, - score_thresh=args.score_thresh) + score_thresh=args.score_thresh, + use_device=args.use_device) return results def add_module_config_arg(self): @@ -289,34 +305,21 @@ class SSDMobileNetv1(hub.Module): Add the command config options. """ self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='detection_result', - help="The directory to save output images.") + '--output_dir', type=str, default='detection_result', help="The directory to save output images.") self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument( + '--use_device', + choices=["cpu", "gpu", "xpu", "npu"], + help="use cpu, gpu, xpu or npu. overwrites use_gpu flag.") def add_module_input_arg(self): """ Add the command input options. """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_input_group.add_argument( - '--score_thresh', - type=ast.literal_eval, - default=0.5, - help="threshold for object detecion.") + '--score_thresh', type=ast.literal_eval, default=0.5, help="threshold for object detecion.") diff --git a/modules/image/object_detection/ssd_mobilenet_v1_pascal/processor.py b/modules/image/object_detection/ssd_mobilenet_v1_pascal/processor.py index 82b2335f6a9fb9b6af3359d8cca73438b6919c4c..14c5fee3585374f7736372ec76c404ef51fa201c 100644 --- a/modules/image/object_detection/ssd_mobilenet_v1_pascal/processor.py +++ b/modules/image/object_detection/ssd_mobilenet_v1_pascal/processor.py @@ -15,6 +15,7 @@ def base64_to_cv2(b64str): data = cv2.imdecode(data, cv2.IMREAD_COLOR) return data + def check_dir(dir_path): if not os.path.exists(dir_path): os.makedirs(dir_path) @@ -22,6 +23,7 @@ def check_dir(dir_path): os.remove(dir_path) os.makedirs(dir_path) + def get_save_image_name(img, output_dir, image_path): """ Get save image name from source image path. @@ -50,23 +52,17 @@ def draw_bounding_box_on_image(image_path, data_list, save_dir): image = Image.open(image_path) draw = ImageDraw.Draw(image) for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') # draw label if image.mode == 'RGB': text = data['label'] + ": %.2f%%" % (100 * data['confidence']) textsize_width, textsize_height = draw.textsize(text=text) draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) save_name = get_save_image_name(image, save_dir, image_path) @@ -95,14 +91,7 @@ def load_label_info(file_path): return label_names -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): """ postprocess the lod_tensor produced by fluid.Executor.run @@ -127,9 +116,8 @@ def postprocess(paths, confidence (float): The confidence of detection result. save_path (str): The path to save output images. """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() + results = data_out.copy_to_cpu() + lod = data_out.lod()[0] check_dir(output_dir) @@ -159,9 +147,7 @@ def postprocess(paths, org_img = org_img.astype(np.uint8) org_img = Image.fromarray(org_img[:, :, ::-1]) if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) org_img.save(org_img_path) org_img_height = org_img.height org_img_width = org_img.width @@ -181,13 +167,11 @@ def postprocess(paths, dt = {} dt['label'] = label_names[category_id] dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) output_i['data'].append(dt) output.append(output_i) if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) return output diff --git a/modules/image/object_detection/yolov3_darknet53_coco2017/module.py b/modules/image/object_detection/yolov3_darknet53_coco2017/module.py index 7886ef4e6af81aeb47d4d710af86dcfcc4d5ad03..39ff9bef1097b8578e97342faeea1adcd813d902 100644 --- a/modules/image/object_detection/yolov3_darknet53_coco2017/module.py +++ b/modules/image/object_detection/yolov3_darknet53_coco2017/module.py @@ -9,7 +9,10 @@ from functools import partial import numpy as np import paddle.fluid as fluid import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor + +from paddle.inference import Config +from paddle.inference import create_predictor + from paddlehub.module.module import moduleinfo, runnable, serving from paddlehub.common.paddle_helper import add_vars_prefix @@ -32,27 +35,54 @@ class YOLOv3DarkNet53Coco2017(hub.Module): self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) self._set_config() + def _get_device_id(self, places): + try: + places = os.environ[places] + id = int(places) + except: + id = -1 + return id + def _set_config(self): """ predictor config setting. """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + + # create default cpu predictor + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + # create predictors using various types of devices + + # npu + npu_id = self._get_device_id("FLAGS_selected_npus") + if npu_id != -1: + # use npu + npu_config = Config(self.default_pretrained_model_path) + npu_config.disable_glog_info() + npu_config.enable_npu(device_id=npu_id) + self.npu_predictor = create_predictor(npu_config) + + # gpu + gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES") + if gpu_id != -1: + # use gpu + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=gpu_id) + self.gpu_predictor = create_predictor(gpu_config) + + # xpu + xpu_id = self._get_device_id("XPU_VISIBLE_DEVICES") + if xpu_id != -1: + # use xpu + xpu_config = Config(self.default_pretrained_model_path) + xpu_config.disable_glog_info() + xpu_config.enable_xpu(100) + self.xpu_predictor = create_predictor(xpu_config) def context(self, trainable=True, pretrained=True, get_prediction=False): """ @@ -135,7 +165,8 @@ class YOLOv3DarkNet53Coco2017(hub.Module): use_gpu=False, output_dir='detection_result', score_thresh=0.5, - visualization=True): + visualization=True, + use_device=None): """API of Object Detection. Args: @@ -146,6 +177,7 @@ class YOLOv3DarkNet53Coco2017(hub.Module): output_dir (str): The path to store output images. visualization (bool): Whether to save image or not. score_thresh (float): threshold for object detecion. + use_device (str): use cpu, gpu, xpu or npu, overwrites use_gpu flag. Returns: res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: @@ -158,14 +190,24 @@ class YOLOv3DarkNet53Coco2017(hub.Module): confidence (float): The confidence of detection result. save_path (str, optional): The path to save output images. """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) + # real predictor to use + if use_device is not None: + if use_device == "cpu": + predictor = self.cpu_predictor + elif use_device == "xpu": + predictor = self.xpu_predictor + elif use_device == "npu": + predictor = self.npu_predictor + elif use_device == "gpu": + predictor = self.gpu_predictor + else: + raise Exception("Unsupported device: " + use_device) + else: + # use_device is not set, therefore follow use_gpu + if use_gpu: + predictor = self.gpu_predictor + else: + predictor = self.cpu_predictor paths = paths if paths else list() if data and 'image' in data: @@ -176,17 +218,27 @@ class YOLOv3DarkNet53Coco2017(hub.Module): res = [] for iter_id, feed_data in enumerate(batch_reader()): feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) - im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) - if use_gpu: - data_out = self.gpu_predictor.run([image_tensor, im_size_tensor]) - else: - data_out = self.cpu_predictor.run([image_tensor, im_size_tensor]) + + input_names = predictor.get_input_names() + image_data = np.array(list(feed_data[:, 0])) + image_size_data = np.array(list(feed_data[:, 1])) + + image_tensor = predictor.get_input_handle(input_names[0]) + image_tensor.reshape(image_data.shape) + image_tensor.copy_from_cpu(image_data.copy()) + + image_size_tensor = predictor.get_input_handle(input_names[1]) + image_size_tensor.reshape(image_size_data.shape) + image_size_tensor.copy_from_cpu(image_size_data.copy()) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) output = postprocess( paths=paths, images=images, - data_out=data_out, + data_out=output_handle, score_thresh=score_thresh, label_names=self.label_names, output_dir=output_dir, @@ -245,7 +297,8 @@ class YOLOv3DarkNet53Coco2017(hub.Module): use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization, - score_thresh=args.score_thresh) + score_thresh=args.score_thresh, + use_device=args.use_device) return results def add_module_config_arg(self): @@ -258,6 +311,10 @@ class YOLOv3DarkNet53Coco2017(hub.Module): '--output_dir', type=str, default='detection_result', help="The directory to save output images.") self.arg_config_group.add_argument( '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument( + '--use_device', + choices=["cpu", "gpu", "xpu", "npu"], + help="use cpu, gpu, xpu or npu. overwrites use_gpu flag.") def add_module_input_arg(self): """ diff --git a/modules/image/object_detection/yolov3_darknet53_coco2017/processor.py b/modules/image/object_detection/yolov3_darknet53_coco2017/processor.py index 64049e42b1d4dfa67aff606cae4490710b9cd6cc..40325382e59f3897dc60dcb44d41f62b8a29e766 100644 --- a/modules/image/object_detection/yolov3_darknet53_coco2017/processor.py +++ b/modules/image/object_detection/yolov3_darknet53_coco2017/processor.py @@ -94,8 +94,6 @@ def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, paths (list[str]): The paths of images. images (list(numpy.ndarray)): images data, shape of each is [H, W, C] data_out (lod_tensor): data output of predictor. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. output_dir (str): The path to store output images. visualization (bool): Whether to save image or not. score_thresh (float): the low limit of bounding box. @@ -113,9 +111,8 @@ def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, confidence (float): The confidence of detection result. save_path (str): The path to save output images. """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() + results = data_out.copy_to_cpu() + lod = data_out.lod()[0] check_dir(output_dir) diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py b/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py index 590b9efacdc34dc9b9e9a7fe1daa1f9c75f8cc80..a2b8e184b2c90f820c6e8450e83bde92225e6bbb 100644 --- a/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py +++ b/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py @@ -9,7 +9,10 @@ from functools import partial import numpy as np import paddle.fluid as fluid import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor + +from paddle.inference import Config +from paddle.inference import create_predictor + from paddlehub.module.module import moduleinfo, runnable, serving from paddlehub.common.paddle_helper import add_vars_prefix @@ -23,39 +26,63 @@ from yolov3_darknet53_pedestrian.yolo_head import MultiClassNMS, YOLOv3Head name="yolov3_darknet53_pedestrian", version="1.0.2", type="CV/object_detection", - summary= - "Baidu's YOLOv3 model for pedestrian detection, with backbone DarkNet53.", + summary="Baidu's YOLOv3 model for pedestrian detection, with backbone DarkNet53.", author="paddlepaddle", author_email="paddle-dev@baidu.com") class YOLOv3DarkNet53Pedestrian(hub.Module): def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "yolov3_darknet53_pedestrian_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) + self.default_pretrained_model_path = os.path.join(self.directory, "yolov3_darknet53_pedestrian_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) self._set_config() + def _get_device_id(self, places): + try: + places = os.environ[places] + id = int(places) + except: + id = -1 + return id + def _set_config(self): """ predictor config setting. """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + + # create default cpu predictor + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + # create predictors using various types of devices + + # npu + npu_id = self._get_device_id("FLAGS_selected_npus") + if npu_id != -1: + # use npu + npu_config = Config(self.default_pretrained_model_path) + npu_config.disable_glog_info() + npu_config.enable_npu(device_id=npu_id) + self.npu_predictor = create_predictor(npu_config) + + # gpu + gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES") + if gpu_id != -1: + # use gpu + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=gpu_id) + self.gpu_predictor = create_predictor(gpu_config) + + # xpu + xpu_id = self._get_device_id("XPU_VISIBLE_DEVICES") + if xpu_id != -1: + # use xpu + xpu_config = Config(self.default_pretrained_model_path) + xpu_config.disable_glog_info() + xpu_config.enable_xpu(100) + self.xpu_predictor = create_predictor(xpu_config) def context(self, trainable=True, pretrained=True, get_prediction=False): """ @@ -76,20 +103,18 @@ class YOLOv3DarkNet53Pedestrian(hub.Module): with fluid.program_guard(context_prog, startup_program): with fluid.unique_name.guard(): # image - image = fluid.layers.data( - name='image', shape=[3, 608, 608], dtype='float32') + image = fluid.layers.data(name='image', shape=[3, 608, 608], dtype='float32') # backbone backbone = DarkNet(norm_type='sync_bn', norm_decay=0., depth=53) # body_feats body_feats = backbone(image) # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') + im_size = fluid.layers.data(name='im_size', shape=[2], dtype='int32') # yolo_head yolo_head = YOLOv3Head( anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], - [59, 119], [116, 90], [156, 198], [373, 326]], + anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], + [373, 326]], norm_decay=0., num_classes=1, ignore_thresh=0.7, @@ -102,8 +127,7 @@ class YOLOv3DarkNet53Pedestrian(hub.Module): normalized=False, score_threshold=0.01)) # head_features - head_features, body_features = yolo_head._get_outputs( - body_feats, is_train=trainable) + head_features, body_features = yolo_head._get_outputs(body_feats, is_train=trainable) place = fluid.CPUPlace() exe = fluid.Executor(place) @@ -112,35 +136,24 @@ class YOLOv3DarkNet53Pedestrian(hub.Module): # var_prefix var_prefix = '@HUB_{}@'.format(self.name) # name of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } + inputs = {'image': var_prefix + image.name, 'im_size': var_prefix + im_size.name} # name of outputs if get_prediction: bbox_out = yolo_head.get_prediction(head_features, im_size) outputs = {'bbox_out': [var_prefix + bbox_out.name]} else: outputs = { - 'head_features': - [var_prefix + var.name for var in head_features], - 'body_features': - [var_prefix + var.name for var in body_features] + 'head_features': [var_prefix + var.name for var in head_features], + 'body_features': [var_prefix + var.name for var in body_features] } # add_vars_prefix add_vars_prefix(context_prog, var_prefix) add_vars_prefix(fluid.default_startup_program(), var_prefix) # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } + inputs = {key: context_prog.global_block().vars[value] for key, value in inputs.items()} # outputs outputs = { - key: [ - context_prog.global_block().vars[varname] - for varname in value - ] + key: [context_prog.global_block().vars[varname] for varname in value] for key, value in outputs.items() } # trainable @@ -150,14 +163,9 @@ class YOLOv3DarkNet53Pedestrian(hub.Module): if pretrained: def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) else: exe.run(startup_program) @@ -170,7 +178,8 @@ class YOLOv3DarkNet53Pedestrian(hub.Module): use_gpu=False, output_dir='yolov3_pedestrian_detect_output', score_thresh=0.2, - visualization=True): + visualization=True, + use_device=None): """API of Object Detection. Args: @@ -181,6 +190,7 @@ class YOLOv3DarkNet53Pedestrian(hub.Module): output_dir (str): The path to store output images. visualization (bool): Whether to save image or not. score_thresh (float): threshold for object detecion. + use_device (str): use cpu, gpu, xpu or npu, overwrites use_gpu flag. Returns: res (list[dict]): The result of pedestrian detecion. keys include 'data', 'save_path', the corresponding value is: @@ -193,14 +203,24 @@ class YOLOv3DarkNet53Pedestrian(hub.Module): confidence (float): The confidence of detection result. save_path (str, optional): The path to save output images. """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Attempt to use GPU for prediction, but environment variable CUDA_VISIBLE_DEVICES was not set correctly." - ) + # real predictor to use + if use_device is not None: + if use_device == "cpu": + predictor = self.cpu_predictor + elif use_device == "xpu": + predictor = self.xpu_predictor + elif use_device == "npu": + predictor = self.npu_predictor + elif use_device == "gpu": + predictor = self.gpu_predictor + else: + raise Exception("Unsupported device: " + use_device) + else: + # use_device is not set, therefore follow use_gpu + if use_gpu: + predictor = self.gpu_predictor + else: + predictor = self.cpu_predictor paths = paths if paths else list() data_reader = partial(reader, paths, images) @@ -208,19 +228,27 @@ class YOLOv3DarkNet53Pedestrian(hub.Module): res = [] for iter_id, feed_data in enumerate(batch_reader()): feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) - im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) - if use_gpu: - data_out = self.gpu_predictor.run( - [image_tensor, im_size_tensor]) - else: - data_out = self.cpu_predictor.run( - [image_tensor, im_size_tensor]) + + input_names = predictor.get_input_names() + image_data = np.array(list(feed_data[:, 0])) + image_size_data = np.array(list(feed_data[:, 1])) + + image_tensor = predictor.get_input_handle(input_names[0]) + image_tensor.reshape(image_data.shape) + image_tensor.copy_from_cpu(image_data.copy()) + + image_size_tensor = predictor.get_input_handle(input_names[1]) + image_size_tensor.reshape(image_size_data.shape) + image_size_tensor.copy_from_cpu(image_size_data.copy()) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) output = postprocess( paths=paths, images=images, - data_out=data_out, + data_out=output_handle, score_thresh=score_thresh, label_names=self.label_names, output_dir=output_dir, @@ -229,11 +257,7 @@ class YOLOv3DarkNet53Pedestrian(hub.Module): res.extend(output) return res - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename @@ -271,12 +295,9 @@ class YOLOv3DarkNet53Pedestrian(hub.Module): prog='hub run {}'.format(self.name), usage='%(prog)s', add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") + title="Config options", description="Run configuration for controlling module behavior, not required.") self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) @@ -286,7 +307,8 @@ class YOLOv3DarkNet53Pedestrian(hub.Module): use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization, - score_thresh=args.score_thresh) + score_thresh=args.score_thresh, + use_device=args.use_device) return results def add_module_config_arg(self): @@ -294,34 +316,24 @@ class YOLOv3DarkNet53Pedestrian(hub.Module): Add the command config options. """ self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") self.arg_config_group.add_argument( '--output_dir', type=str, default='yolov3_pedestrian_detect_output', help="The directory to save output images.") self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument( + '--use_device', + choices=["cpu", "gpu", "xpu", "npu"], + help="use cpu, gpu, xpu or npu. overwrites use_gpu flag.") def add_module_input_arg(self): """ Add the command input options. """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_input_group.add_argument( - '--score_thresh', - type=ast.literal_eval, - default=0.2, - help="threshold for object detecion.") + '--score_thresh', type=ast.literal_eval, default=0.2, help="threshold for object detecion.") diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py b/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py index 1039e3e48d66ace28762008ebed3d08da5f702d6..5aa464e6bf950b7c64b271e2673663d689ad1f24 100644 --- a/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py +++ b/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py @@ -50,21 +50,15 @@ def draw_bounding_box_on_image(image_path, data_list, save_dir): image = Image.open(image_path) draw = ImageDraw.Draw(image) for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') # draw label if image.mode == 'RGB': text = data['label'] + ": %.2f%%" % (100 * data['confidence']) textsize_width, textsize_height = draw.textsize(text=text) draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) save_name = get_save_image_name(image, save_dir, image_path) @@ -92,14 +86,7 @@ def load_label_info(file_path): return label_names -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): """ postprocess the lod_tensor produced by fluid.Executor.run @@ -107,8 +94,6 @@ def postprocess(paths, paths (list[str]): The paths of images. images (list(numpy.ndarray)): images data, shape of each is [H, W, C] data_out (lod_tensor): data output of predictor. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. output_dir (str): The path to store output images. visualization (bool): Whether to save image or not. score_thresh (float): the low limit of bounding box. @@ -126,9 +111,8 @@ def postprocess(paths, confidence (float): The confidence of detection result. save_path (str): The path to save output images. """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() + results = data_out.copy_to_cpu() + lod = data_out.lod()[0] check_dir(output_dir) @@ -146,7 +130,6 @@ def postprocess(paths, else: unhandled_paths_num = 0 - output = list() for index in range(len(lod) - 1): output_i = {'data': []} @@ -158,9 +141,7 @@ def postprocess(paths, org_img = org_img.astype(np.uint8) org_img = Image.fromarray(org_img[:, :, ::-1]) if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) org_img.save(org_img_path) org_img_height = org_img.height org_img_width = org_img.width @@ -176,13 +157,11 @@ def postprocess(paths, dt = {} dt['label'] = label_names[category_id] dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) output_i['data'].append(dt) output.append(output_i) if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) return output diff --git a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/module.py b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/module.py index 98e1110a0bba80f6559d3653612cb49754179fb0..1a905df2aa5b700f32143e6c9004b1daaae43d8d 100644 --- a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/module.py +++ b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/module.py @@ -9,7 +9,10 @@ from functools import partial import numpy as np import paddle.fluid as fluid import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor + +from paddle.inference import Config +from paddle.inference import create_predictor + from paddlehub.module.module import moduleinfo, runnable, serving from paddlehub.common.paddle_helper import add_vars_prefix @@ -23,39 +26,63 @@ from yolov3_mobilenet_v1_coco2017.yolo_head import MultiClassNMS, YOLOv3Head name="yolov3_mobilenet_v1_coco2017", version="1.0.2", type="CV/object_detection", - summary= - "Baidu's YOLOv3 model for object detection with backbone MobileNet_V1, trained with dataset COCO2017.", + summary="Baidu's YOLOv3 model for object detection with backbone MobileNet_V1, trained with dataset COCO2017.", author="paddlepaddle", author_email="paddle-dev@baidu.com") class YOLOv3MobileNetV1Coco2017(hub.Module): def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "yolov3_mobilenet_v1_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) + self.default_pretrained_model_path = os.path.join(self.directory, "yolov3_mobilenet_v1_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) self._set_config() + def _get_device_id(self, places): + try: + places = os.environ[places] + id = int(places) + except: + id = -1 + return id + def _set_config(self): """ predictor config setting. """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + + # create default cpu predictor + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + # create predictors using various types of devices + + # npu + npu_id = self._get_device_id("FLAGS_selected_npus") + if npu_id != -1: + # use npu + npu_config = Config(self.default_pretrained_model_path) + npu_config.disable_glog_info() + npu_config.enable_npu(device_id=npu_id) + self.npu_predictor = create_predictor(npu_config) + + # gpu + gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES") + if gpu_id != -1: + # use gpu + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=gpu_id) + self.gpu_predictor = create_predictor(gpu_config) + + # xpu + xpu_id = self._get_device_id("XPU_VISIBLE_DEVICES") + if xpu_id != -1: + # use xpu + xpu_config = Config(self.default_pretrained_model_path) + xpu_config.disable_glog_info() + xpu_config.enable_xpu(100) + self.xpu_predictor = create_predictor(xpu_config) def context(self, trainable=True, pretrained=True, get_prediction=False): """ @@ -76,24 +103,17 @@ class YOLOv3MobileNetV1Coco2017(hub.Module): with fluid.program_guard(context_prog, startup_program): with fluid.unique_name.guard(): # image - image = fluid.layers.data( - name='image', shape=[3, 608, 608], dtype='float32') + image = fluid.layers.data(name='image', shape=[3, 608, 608], dtype='float32') # backbone - backbone = MobileNet( - norm_type='sync_bn', - norm_decay=0., - conv_group_scale=1, - with_extra_blocks=False) + backbone = MobileNet(norm_type='sync_bn', norm_decay=0., conv_group_scale=1, with_extra_blocks=False) # body_feats body_feats = backbone(image) # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') + im_size = fluid.layers.data(name='im_size', shape=[2], dtype='int32') # yolo_head yolo_head = YOLOv3Head(num_classes=80) # head_features - head_features, body_features = yolo_head._get_outputs( - body_feats, is_train=trainable) + head_features, body_features = yolo_head._get_outputs(body_feats, is_train=trainable) place = fluid.CPUPlace() exe = fluid.Executor(place) @@ -102,35 +122,24 @@ class YOLOv3MobileNetV1Coco2017(hub.Module): # var_prefix var_prefix = '@HUB_{}@'.format(self.name) # name of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } + inputs = {'image': var_prefix + image.name, 'im_size': var_prefix + im_size.name} # name of outputs if get_prediction: bbox_out = yolo_head.get_prediction(head_features, im_size) outputs = {'bbox_out': [var_prefix + bbox_out.name]} else: outputs = { - 'head_features': - [var_prefix + var.name for var in head_features], - 'body_features': - [var_prefix + var.name for var in body_features] + 'head_features': [var_prefix + var.name for var in head_features], + 'body_features': [var_prefix + var.name for var in body_features] } # add_vars_prefix add_vars_prefix(context_prog, var_prefix) add_vars_prefix(startup_program, var_prefix) # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } + inputs = {key: context_prog.global_block().vars[value] for key, value in inputs.items()} # outputs outputs = { - key: [ - context_prog.global_block().vars[varname] - for varname in value - ] + key: [context_prog.global_block().vars[varname] for varname in value] for key, value in outputs.items() } # trainable @@ -140,14 +149,9 @@ class YOLOv3MobileNetV1Coco2017(hub.Module): if pretrained: def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) else: exe.run(startup_program) @@ -160,7 +164,8 @@ class YOLOv3MobileNetV1Coco2017(hub.Module): use_gpu=False, output_dir='detection_result', score_thresh=0.5, - visualization=True): + visualization=True, + use_device=None): """API of Object Detection. Args: @@ -171,6 +176,7 @@ class YOLOv3MobileNetV1Coco2017(hub.Module): output_dir (str): The path to store output images. visualization (bool): Whether to save image or not. score_thresh (float): threshold for object detecion. + use_device (str): use cpu, gpu, xpu or npu, overwrites use_gpu flag. Returns: res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: @@ -183,14 +189,24 @@ class YOLOv3MobileNetV1Coco2017(hub.Module): confidence (float): The confidence of detection result. save_path (str, optional): The path to save output images. """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Attempt to use GPU for prediction, but environment variable CUDA_VISIBLE_DEVICES was not set correctly." - ) + # real predictor to use + if use_device is not None: + if use_device == "cpu": + predictor = self.cpu_predictor + elif use_device == "xpu": + predictor = self.xpu_predictor + elif use_device == "npu": + predictor = self.npu_predictor + elif use_device == "gpu": + predictor = self.gpu_predictor + else: + raise Exception("Unsupported device: " + use_device) + else: + # use_device is not set, therefore follow use_gpu + if use_gpu: + predictor = self.gpu_predictor + else: + predictor = self.cpu_predictor paths = paths if paths else list() data_reader = partial(reader, paths, images) @@ -198,19 +214,27 @@ class YOLOv3MobileNetV1Coco2017(hub.Module): res = [] for iter_id, feed_data in enumerate(batch_reader()): feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) - im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) - if use_gpu: - data_out = self.gpu_predictor.run( - [image_tensor, im_size_tensor]) - else: - data_out = self.cpu_predictor.run( - [image_tensor, im_size_tensor]) + + input_names = predictor.get_input_names() + image_data = np.array(list(feed_data[:, 0])) + image_size_data = np.array(list(feed_data[:, 1])) + + image_tensor = predictor.get_input_handle(input_names[0]) + image_tensor.reshape(image_data.shape) + image_tensor.copy_from_cpu(image_data.copy()) + + image_size_tensor = predictor.get_input_handle(input_names[1]) + image_size_tensor.reshape(image_size_data.shape) + image_size_tensor.copy_from_cpu(image_size_data.copy()) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) output = postprocess( paths=paths, images=images, - data_out=data_out, + data_out=output_handle, score_thresh=score_thresh, label_names=self.label_names, output_dir=output_dir, @@ -219,11 +243,7 @@ class YOLOv3MobileNetV1Coco2017(hub.Module): res.extend(output) return res - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename @@ -261,12 +281,9 @@ class YOLOv3MobileNetV1Coco2017(hub.Module): prog='hub run {}'.format(self.name), usage='%(prog)s', add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") + title="Config options", description="Run configuration for controlling module behavior, not required.") self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) @@ -276,7 +293,8 @@ class YOLOv3MobileNetV1Coco2017(hub.Module): use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization, - score_thresh=args.score_thresh) + score_thresh=args.score_thresh, + use_device=args.use_device) return results def add_module_config_arg(self): @@ -284,34 +302,21 @@ class YOLOv3MobileNetV1Coco2017(hub.Module): Add the command config options. """ self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='detection_result', - help="The directory to save output images.") + '--output_dir', type=str, default='detection_result', help="The directory to save output images.") self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument( + '--use_device', + choices=["cpu", "gpu", "xpu", "npu"], + help="use cpu, gpu, xpu or npu. overwrites use_gpu flag.") def add_module_input_arg(self): """ Add the command input options. """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_input_group.add_argument( - '--score_thresh', - type=ast.literal_eval, - default=0.5, - help="threshold for object detecion.") + '--score_thresh', type=ast.literal_eval, default=0.5, help="threshold for object detecion.") diff --git a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/processor.py b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/processor.py index 2f9a42d9c0ce6fc2d819349580d850b908ccfb51..5c874e67a1ebfae64d6599f4004afecf48169ffe 100644 --- a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/processor.py +++ b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/processor.py @@ -50,21 +50,15 @@ def draw_bounding_box_on_image(image_path, data_list, save_dir): image = Image.open(image_path) draw = ImageDraw.Draw(image) for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') # draw label if image.mode == 'RGB': text = data['label'] + ": %.2f%%" % (100 * data['confidence']) textsize_width, textsize_height = draw.textsize(text=text) draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) save_name = get_save_image_name(image, save_dir, image_path) @@ -92,14 +86,7 @@ def load_label_info(file_path): return label_names -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): """ postprocess the lod_tensor produced by fluid.Executor.run @@ -108,8 +95,6 @@ def postprocess(paths, images (list(numpy.ndarray)): images data, shape of each is [H, W, C] data_out (lod_tensor): data output of predictor. batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. visualization (bool): Whether to save image or not. score_thresh (float): the low limit of bounding box. label_names (list[str]): label names. @@ -126,9 +111,8 @@ def postprocess(paths, confidence (float): The confidence of detection result. save_path (str): The path to save output images. """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() + results = data_out.copy_to_cpu() + lod = data_out.lod()[0] check_dir(output_dir) @@ -157,9 +141,7 @@ def postprocess(paths, org_img = org_img.astype(np.uint8) org_img = Image.fromarray(org_img[:, :, ::-1]) if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) org_img.save(org_img_path) org_img_height = org_img.height org_img_width = org_img.width @@ -175,13 +157,11 @@ def postprocess(paths, dt = {} dt['label'] = label_names[category_id] dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) output_i['data'].append(dt) output.append(output_i) if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) return output diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/module.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/module.py index cdd037d89255d36cc4a3eb8eb0a4e46ad69b4cfd..ed95185a93eae58951dce27cfa68a73d64fd45d4 100644 --- a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/module.py +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/module.py @@ -9,7 +9,10 @@ from functools import partial import numpy as np import paddle.fluid as fluid import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor + +from paddle.inference import Config +from paddle.inference import create_predictor + from paddlehub.module.module import moduleinfo, runnable, serving from paddlehub.common.paddle_helper import add_vars_prefix @@ -23,39 +26,63 @@ from yolov3_resnet50_vd_coco2017.yolo_head import MultiClassNMS, YOLOv3Head name="yolov3_resnet50_vd_coco2017", version="1.0.2", type="CV/object_detection", - summary= - "Baidu's YOLOv3 model for object detection with backbone ResNet50, trained with dataset coco2017.", + summary="Baidu's YOLOv3 model for object detection with backbone ResNet50, trained with dataset coco2017.", author="paddlepaddle", author_email="paddle-dev@baidu.com") class YOLOv3ResNet50Coco2017(hub.Module): def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "yolov3_resnet50_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) + self.default_pretrained_model_path = os.path.join(self.directory, "yolov3_resnet50_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) self._set_config() + def _get_device_id(self, places): + try: + places = os.environ[places] + id = int(places) + except: + id = -1 + return id + def _set_config(self): """ predictor config setting. """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + + # create default cpu predictor + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + # create predictors using various types of devices + + # npu + npu_id = self._get_device_id("FLAGS_selected_npus") + if npu_id != -1: + # use npu + npu_config = Config(self.default_pretrained_model_path) + npu_config.disable_glog_info() + npu_config.enable_npu(device_id=npu_id) + self.npu_predictor = create_predictor(npu_config) + + # gpu + gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES") + if gpu_id != -1: + # use gpu + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=gpu_id) + self.gpu_predictor = create_predictor(gpu_config) + + # xpu + xpu_id = self._get_device_id("XPU_VISIBLE_DEVICES") + if xpu_id != -1: + # use xpu + xpu_config = Config(self.default_pretrained_model_path) + xpu_config.disable_glog_info() + xpu_config.enable_xpu(100) + self.xpu_predictor = create_predictor(xpu_config) def context(self, trainable=True, pretrained=True, get_prediction=False): """ @@ -76,8 +103,7 @@ class YOLOv3ResNet50Coco2017(hub.Module): with fluid.program_guard(context_prog, startup_program): with fluid.unique_name.guard(): # image - image = fluid.layers.data( - name='image', shape=[3, 608, 608], dtype='float32') + image = fluid.layers.data(name='image', shape=[3, 608, 608], dtype='float32') # backbone backbone = ResNet( norm_type='sync_bn', @@ -91,13 +117,11 @@ class YOLOv3ResNet50Coco2017(hub.Module): # body_feats body_feats = backbone(image) # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') + im_size = fluid.layers.data(name='im_size', shape=[2], dtype='int32') # yolo_head yolo_head = YOLOv3Head(num_classes=80) # head_features - head_features, body_features = yolo_head._get_outputs( - body_feats, is_train=trainable) + head_features, body_features = yolo_head._get_outputs(body_feats, is_train=trainable) place = fluid.CPUPlace() exe = fluid.Executor(place) @@ -106,35 +130,24 @@ class YOLOv3ResNet50Coco2017(hub.Module): # var_prefix var_prefix = '@HUB_{}@'.format(self.name) # name of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } + inputs = {'image': var_prefix + image.name, 'im_size': var_prefix + im_size.name} # name of outputs if get_prediction: bbox_out = yolo_head.get_prediction(head_features, im_size) outputs = {'bbox_out': [var_prefix + bbox_out.name]} else: outputs = { - 'head_features': - [var_prefix + var.name for var in head_features], - 'body_features': - [var_prefix + var.name for var in body_features] + 'head_features': [var_prefix + var.name for var in head_features], + 'body_features': [var_prefix + var.name for var in body_features] } # add_vars_prefix add_vars_prefix(context_prog, var_prefix) add_vars_prefix(fluid.default_startup_program(), var_prefix) # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } + inputs = {key: context_prog.global_block().vars[value] for key, value in inputs.items()} # outputs outputs = { - key: [ - context_prog.global_block().vars[varname] - for varname in value - ] + key: [context_prog.global_block().vars[varname] for varname in value] for key, value in outputs.items() } # trainable @@ -144,14 +157,9 @@ class YOLOv3ResNet50Coco2017(hub.Module): if pretrained: def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) else: exe.run(startup_program) @@ -164,7 +172,8 @@ class YOLOv3ResNet50Coco2017(hub.Module): use_gpu=False, output_dir='detection_result', score_thresh=0.5, - visualization=True): + visualization=True, + use_device=None): """API of Object Detection. Args: @@ -175,6 +184,7 @@ class YOLOv3ResNet50Coco2017(hub.Module): output_dir (str): The path to store output images. visualization (bool): Whether to save image or not. score_thresh (float): threshold for object detecion. + use_device (str): use cpu, gpu, xpu or npu, overwrites use_gpu flag. Returns: res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: @@ -187,14 +197,24 @@ class YOLOv3ResNet50Coco2017(hub.Module): confidence (float): The confidence of detection result. save_path (str, optional): The path to save output images. """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Attempt to use GPU for prediction, but environment variable CUDA_VISIBLE_DEVICES was not set correctly." - ) + # real predictor to use + if use_device is not None: + if use_device == "cpu": + predictor = self.cpu_predictor + elif use_device == "xpu": + predictor = self.xpu_predictor + elif use_device == "npu": + predictor = self.npu_predictor + elif use_device == "gpu": + predictor = self.gpu_predictor + else: + raise Exception("Unsupported device: " + use_device) + else: + # use_device is not set, therefore follow use_gpu + if use_gpu: + predictor = self.gpu_predictor + else: + predictor = self.cpu_predictor paths = paths if paths else list() data_reader = partial(reader, paths, images) @@ -202,19 +222,27 @@ class YOLOv3ResNet50Coco2017(hub.Module): res = [] for iter_id, feed_data in enumerate(batch_reader()): feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) - im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) - if use_gpu: - data_out = self.gpu_predictor.run( - [image_tensor, im_size_tensor]) - else: - data_out = self.cpu_predictor.run( - [image_tensor, im_size_tensor]) + + input_names = predictor.get_input_names() + image_data = np.array(list(feed_data[:, 0])) + image_size_data = np.array(list(feed_data[:, 1])) + + image_tensor = predictor.get_input_handle(input_names[0]) + image_tensor.reshape(image_data.shape) + image_tensor.copy_from_cpu(image_data.copy()) + + image_size_tensor = predictor.get_input_handle(input_names[1]) + image_size_tensor.reshape(image_size_data.shape) + image_size_tensor.copy_from_cpu(image_size_data.copy()) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) output = postprocess( paths=paths, images=images, - data_out=data_out, + data_out=output_handle, score_thresh=score_thresh, label_names=self.label_names, output_dir=output_dir, @@ -223,11 +251,7 @@ class YOLOv3ResNet50Coco2017(hub.Module): res.extend(output) return res - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename @@ -265,12 +289,9 @@ class YOLOv3ResNet50Coco2017(hub.Module): prog='hub run {}'.format(self.name), usage='%(prog)s', add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") + title="Config options", description="Run configuration for controlling module behavior, not required.") self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) @@ -280,7 +301,8 @@ class YOLOv3ResNet50Coco2017(hub.Module): use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization, - score_thresh=args.score_thresh) + score_thresh=args.score_thresh, + use_device=args.use_device) return results def add_module_config_arg(self): @@ -288,34 +310,21 @@ class YOLOv3ResNet50Coco2017(hub.Module): Add the command config options. """ self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='detection_result', - help="The directory to save output images.") + '--output_dir', type=str, default='detection_result', help="The directory to save output images.") self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument( + '--use_device', + choices=["cpu", "gpu", "xpu", "npu"], + help="use cpu, gpu, xpu or npu. overwrites use_gpu flag.") def add_module_input_arg(self): """ Add the command input options. """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_input_group.add_argument( - '--score_thresh', - type=ast.literal_eval, - default=0.5, - help="threshold for object detecion.") + '--score_thresh', type=ast.literal_eval, default=0.5, help="threshold for object detecion.") diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/processor.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/processor.py index 1039e3e48d66ace28762008ebed3d08da5f702d6..5aa464e6bf950b7c64b271e2673663d689ad1f24 100644 --- a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/processor.py +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/processor.py @@ -50,21 +50,15 @@ def draw_bounding_box_on_image(image_path, data_list, save_dir): image = Image.open(image_path) draw = ImageDraw.Draw(image) for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') # draw label if image.mode == 'RGB': text = data['label'] + ": %.2f%%" % (100 * data['confidence']) textsize_width, textsize_height = draw.textsize(text=text) draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) save_name = get_save_image_name(image, save_dir, image_path) @@ -92,14 +86,7 @@ def load_label_info(file_path): return label_names -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): """ postprocess the lod_tensor produced by fluid.Executor.run @@ -107,8 +94,6 @@ def postprocess(paths, paths (list[str]): The paths of images. images (list(numpy.ndarray)): images data, shape of each is [H, W, C] data_out (lod_tensor): data output of predictor. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. output_dir (str): The path to store output images. visualization (bool): Whether to save image or not. score_thresh (float): the low limit of bounding box. @@ -126,9 +111,8 @@ def postprocess(paths, confidence (float): The confidence of detection result. save_path (str): The path to save output images. """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() + results = data_out.copy_to_cpu() + lod = data_out.lod()[0] check_dir(output_dir) @@ -146,7 +130,6 @@ def postprocess(paths, else: unhandled_paths_num = 0 - output = list() for index in range(len(lod) - 1): output_i = {'data': []} @@ -158,9 +141,7 @@ def postprocess(paths, org_img = org_img.astype(np.uint8) org_img = Image.fromarray(org_img[:, :, ::-1]) if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) org_img.save(org_img_path) org_img_height = org_img.height org_img_width = org_img.width @@ -176,13 +157,11 @@ def postprocess(paths, dt = {} dt['label'] = label_names[category_id] dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) output_i['data'].append(dt) output.append(output_i) if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) return output