diff --git a/deploy/python/infer.py b/deploy/python/infer.py index 745894d381b64878a77c9342f5da3646bce0a336..ac835ab0a0635bab2e9a684ce9f729e02061d12d 100644 --- a/deploy/python/infer.py +++ b/deploy/python/infer.py @@ -133,7 +133,7 @@ class Detector(object): boxes_tensor = self.predictor.get_output_handle(output_names[0]) np_boxes = boxes_tensor.copy_to_cpu() if self.pred_config.mask_resolution is not None: - masks_tensor = self.predictor.get_output_handle(output_names[1]) + masks_tensor = self.predictor.get_output_handle(output_names[2]) np_masks = masks_tensor.copy_to_cpu() t2 = time.time() ms = (t2 - t1) * 1000.0 / repeats diff --git a/deploy/python/preprocess.py b/deploy/python/preprocess.py index 062e2789399ab11c0a2f0ecbd439905c3c0a567f..1cdb9e3844e2589477beb6155106dfc37fde15e2 100644 --- a/deploy/python/preprocess.py +++ b/deploy/python/preprocess.py @@ -79,7 +79,7 @@ class ResizeOp(object): im_info['scale_factor'] = np.array( [im_scale_y, im_scale_x]).astype('float32') # padding im when image_shape fixed by infer_cfg.yml - if self.keep_ratio: + if self.keep_ratio and im_info['input_shape'][1] is not None: max_size = im_info['input_shape'][1] padding_im = np.zeros( (max_size, max_size, im_channel), dtype=np.float32) diff --git a/ppdet/modeling/architecture/cascade_rcnn.py b/ppdet/modeling/architecture/cascade_rcnn.py index 61b389b02142874222abbee191675582912ecc7b..1a678b6a076d12a3607199aab196d9b013b2eb9b 100644 --- a/ppdet/modeling/architecture/cascade_rcnn.py +++ b/ppdet/modeling/architecture/cascade_rcnn.py @@ -158,17 +158,12 @@ class CascadeRCNN(BaseArch): loss.update({'loss': total_loss}) return loss - def get_pred(self, return_numpy=True): + def get_pred(self): bbox, bbox_num = self.bboxes output = { - 'bbox': bbox.numpy(), - 'bbox_num': bbox_num.numpy(), - 'im_id': self.inputs['im_id'].numpy(), + 'bbox': bbox, + 'bbox_num': bbox_num, } - if self.with_mask: - mask = self.mask_post_process(self.bboxes, self.mask_head_out, - self.inputs['im_shape'], - self.inputs['scale_factor']) - output.update(mask) + output.update(self.mask_head_out) return output diff --git a/ppdet/modeling/architecture/faster_rcnn.py b/ppdet/modeling/architecture/faster_rcnn.py index 956b2bd8bf0443f733cbf19e34447729588a2986..5e1c7e610c9275df34a5c59e141e5896a41e77f8 100644 --- a/ppdet/modeling/architecture/faster_rcnn.py +++ b/ppdet/modeling/architecture/faster_rcnn.py @@ -92,12 +92,10 @@ class FasterRCNN(BaseArch): loss.update({'loss': total_loss}) return loss - def get_pred(self, return_numpy=True): + def get_pred(self): bbox, bbox_num = self.bboxes output = { - 'bbox': bbox.numpy(), - 'bbox_num': bbox_num.numpy(), - 'im_id': self.inputs['im_id'].numpy() + 'bbox': bbox, + 'bbox_num': bbox_num, } - return output diff --git a/ppdet/modeling/architecture/mask_rcnn.py b/ppdet/modeling/architecture/mask_rcnn.py index c073499e6333360b74a272398565d52ee0b47312..368daa727454f143fc5311de5126667c1f5bd8dc 100644 --- a/ppdet/modeling/architecture/mask_rcnn.py +++ b/ppdet/modeling/architecture/mask_rcnn.py @@ -133,15 +133,11 @@ class MaskRCNN(BaseArch): loss.update({'loss': total_loss}) return loss - def get_pred(self, return_numpy=True): - mask = self.mask_post_process(self.bboxes, self.mask_head_out, - self.inputs['im_shape'], - self.inputs['scale_factor']) + def get_pred(self): bbox, bbox_num = self.bboxes output = { - 'bbox': bbox.numpy(), - 'bbox_num': bbox_num.numpy(), - 'im_id': self.inputs['im_id'].numpy() + 'bbox': bbox, + 'bbox_num': bbox_num, + 'mask': self.mask_head_out } - output.update(mask) return output diff --git a/ppdet/modeling/architecture/meta_arch.py b/ppdet/modeling/architecture/meta_arch.py index c99bdb16e80824388d63c627395e336cccd78b3d..06e27bca93c278f8b18b6c60cdfde4a077bf3025 100644 --- a/ppdet/modeling/architecture/meta_arch.py +++ b/ppdet/modeling/architecture/meta_arch.py @@ -16,18 +16,24 @@ class BaseArch(nn.Layer): def __init__(self): super(BaseArch, self).__init__() - def forward(self, data, input_def, mode, input_tensor=None): + def forward(self, + input_tensor=None, + data=None, + input_def=None, + mode='infer'): if input_tensor is None: + assert data is not None and input_def is not None self.inputs = self.build_inputs(data, input_def) else: self.inputs = input_tensor + self.inputs['mode'] = mode self.model_arch() if mode == 'train': out = self.get_loss() elif mode == 'infer': - out = self.get_pred(input_tensor is None) + out = self.get_pred() else: out = None raise "Now, only support train and infer mode!" @@ -47,6 +53,3 @@ class BaseArch(nn.Layer): def get_pred(self, ): raise NotImplementedError("Should implement get_pred method!") - - def get_export_model(self, input_tensor): - return self.forward(None, None, 'infer', input_tensor) diff --git a/ppdet/modeling/architecture/yolo.py b/ppdet/modeling/architecture/yolo.py index 0f22c209eca1dd012f0372af4bdac172b17f787e..cbf09d47acb645d728521a96cd5490b42532590a 100644 --- a/ppdet/modeling/architecture/yolo.py +++ b/ppdet/modeling/architecture/yolo.py @@ -43,16 +43,12 @@ class YOLOv3(BaseArch): loss = self.yolo_head.get_loss(self.yolo_head_outs, self.inputs) return loss - def get_pred(self, return_numpy=True): + def get_pred(self): bbox, bbox_num = self.post_process( self.yolo_head_outs, self.yolo_head.mask_anchors, self.inputs['im_shape'], self.inputs['scale_factor']) - if return_numpy: - outs = { - "bbox": bbox.numpy(), - "bbox_num": bbox_num.numpy(), - 'im_id': self.inputs['im_id'].numpy() - } - else: - outs = [bbox, bbox_num] + outs = { + "bbox": bbox, + "bbox_num": bbox_num, + } return outs diff --git a/ppdet/modeling/head/mask_head.py b/ppdet/modeling/head/mask_head.py index 929e2c87f051e0d9b9630df53e48cb1b073c5ff4..4fc0984cc7ed241791f7dc039dc17f6ee61cff45 100644 --- a/ppdet/modeling/head/mask_head.py +++ b/ppdet/modeling/head/mask_head.py @@ -160,12 +160,19 @@ class MaskHead(Layer): bbox, bbox_num = bboxes if bbox.shape[0] == 0: - mask_head_out = bbox + mask_head_out = paddle.full([1, 6], -1) + return mask_head_out else: - scale_factor_list = [] + # TODO(guanghua): Remove fluid dependency + scale_factor_list = paddle.fluid.layers.create_array('float32') + num_count = 0 for idx, num in enumerate(bbox_num): for n in range(num): - scale_factor_list.append(scale_factor[idx, 0]) + paddle.fluid.layers.array_write( + x=scale_factor[idx, 0], + i=paddle.to_tensor(num_count), + array=scale_factor_list) + num_count += 1 scale_factor_list = paddle.cast( paddle.concat(scale_factor_list), 'float32') scale_factor_list = paddle.reshape(scale_factor_list, shape=[-1, 1]) @@ -182,7 +189,7 @@ class MaskHead(Layer): mode='infer') mask_logit = self.mask_fcn_logits[stage](mask_feat) mask_head_out = F.sigmoid(mask_logit) - return mask_head_out + return mask_head_out def forward(self, inputs, diff --git a/ppdet/modeling/head/roi_extractor.py b/ppdet/modeling/head/roi_extractor.py index 4a6d25423f2ad039f63a711c1a89c5b610895629..43121370ad3a8a2c1d813e5051c28f9f227eb0dc 100644 --- a/ppdet/modeling/head/roi_extractor.py +++ b/ppdet/modeling/head/roi_extractor.py @@ -36,7 +36,6 @@ class RoIAlign(object): def __call__(self, feats, rois, spatial_scale): roi, rois_num = rois - if self.start_level == self.end_level: rois_feat = ops.roi_align( feats[self.start_level], @@ -44,28 +43,28 @@ class RoIAlign(object): self.resolution, spatial_scale, rois_num=rois_num) - return rois_feat - offset = 2 - k_min = self.start_level + offset - k_max = self.end_level + offset - rois_dist, restore_index, rois_num_dist = ops.distribute_fpn_proposals( - roi, - k_min, - k_max, - self.canconical_level, - self.canonical_size, - rois_num=rois_num) - rois_feat_list = [] - for lvl in range(self.start_level, self.end_level + 1): - roi_feat = ops.roi_align( - feats[lvl], - rois_dist[lvl], - self.resolution, - spatial_scale[lvl], - sampling_ratio=self.sampling_ratio, - rois_num=rois_num_dist[lvl]) - rois_feat_list.append(roi_feat) - rois_feat_shuffle = paddle.concat(rois_feat_list) - rois_feat = paddle.gather(rois_feat_shuffle, restore_index) + else: + offset = 2 + k_min = self.start_level + offset + k_max = self.end_level + offset + rois_dist, restore_index, rois_num_dist = ops.distribute_fpn_proposals( + roi, + k_min, + k_max, + self.canconical_level, + self.canonical_size, + rois_num=rois_num) + rois_feat_list = [] + for lvl in range(self.start_level, self.end_level + 1): + roi_feat = ops.roi_align( + feats[lvl], + rois_dist[lvl], + self.resolution, + spatial_scale[lvl], + sampling_ratio=self.sampling_ratio, + rois_num=rois_num_dist[lvl]) + rois_feat_list.append(roi_feat) + rois_feat_shuffle = paddle.concat(rois_feat_list) + rois_feat = paddle.gather(rois_feat_shuffle, restore_index) return rois_feat diff --git a/ppdet/modeling/neck/fpn.py b/ppdet/modeling/neck/fpn.py index da9c63ace523c5f4cb79c64225f083af0de61724..318c83ca09d1077dccd002bb8ef8317dc3232e42 100644 --- a/ppdet/modeling/neck/fpn.py +++ b/ppdet/modeling/neck/fpn.py @@ -80,7 +80,8 @@ class FPN(Layer): for lvl in range(self.min_level, self.max_level): laterals.append(self.lateral_convs[lvl](body_feats[lvl])) - for lvl in range(self.max_level - 1, self.min_level, -1): + for i in range(self.min_level + 1, self.max_level): + lvl = self.max_level + self.min_level - i upsample = F.interpolate( laterals[lvl], scale_factor=2., diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py index 604b4f7f4fb4d9bdfd22282f62497353b68e8aaa..3a6346cde347dffc64049053c11da474ffbade80 100644 --- a/ppdet/modeling/ops.py +++ b/ppdet/modeling/ops.py @@ -29,10 +29,19 @@ import numpy as np from functools import reduce __all__ = [ - 'roi_pool', 'roi_align', 'prior_box', 'anchor_generator', - 'generate_proposals', 'iou_similarity', 'box_coder', 'yolo_box', - 'multiclass_nms', 'distribute_fpn_proposals', 'collect_fpn_proposals', - 'matrix_nms', 'batch_norm' + 'roi_pool', + 'roi_align', + 'prior_box', + 'anchor_generator', + 'generate_proposals', + 'iou_similarity', + 'box_coder', + 'yolo_box', + 'multiclass_nms', + 'distribute_fpn_proposals', + 'collect_fpn_proposals', + 'matrix_nms', + 'batch_norm', ] @@ -51,6 +60,7 @@ def batch_norm(ch, norm_type='bn', name=None): name=bn_name + '.offset', regularizer=L2Decay(0.))) +@paddle.jit.not_to_static def roi_pool(input, rois, output_size, @@ -123,32 +133,34 @@ def roi_pool(input, "pooled_width", pooled_width, "spatial_scale", spatial_scale) return pool_out, argmaxes - check_variable_and_dtype(input, 'input', ['float32'], 'roi_pool') - check_variable_and_dtype(rois, 'rois', ['float32'], 'roi_pool') - helper = LayerHelper('roi_pool', **locals()) - dtype = helper.input_dtype() - pool_out = helper.create_variable_for_type_inference(dtype) - argmaxes = helper.create_variable_for_type_inference(dtype='int32') - - inputs = { - "X": input, - "ROIs": rois, - } - if rois_num is not None: - inputs['RoisNum'] = rois_num - helper.append_op( - type="roi_pool", - inputs=inputs, - outputs={"Out": pool_out, - "Argmax": argmaxes}, - attrs={ - "pooled_height": pooled_height, - "pooled_width": pooled_width, - "spatial_scale": spatial_scale - }) - return pool_out, argmaxes + else: + check_variable_and_dtype(input, 'input', ['float32'], 'roi_pool') + check_variable_and_dtype(rois, 'rois', ['float32'], 'roi_pool') + helper = LayerHelper('roi_pool', **locals()) + dtype = helper.input_dtype() + pool_out = helper.create_variable_for_type_inference(dtype) + argmaxes = helper.create_variable_for_type_inference(dtype='int32') + + inputs = { + "X": input, + "ROIs": rois, + } + if rois_num is not None: + inputs['RoisNum'] = rois_num + helper.append_op( + type="roi_pool", + inputs=inputs, + outputs={"Out": pool_out, + "Argmax": argmaxes}, + attrs={ + "pooled_height": pooled_height, + "pooled_width": pooled_width, + "spatial_scale": spatial_scale + }) + return pool_out, argmaxes +@paddle.jit.not_to_static def roi_align(input, rois, output_size, @@ -228,31 +240,34 @@ def roi_align(input, "sampling_ratio", sampling_ratio) return align_out - check_variable_and_dtype(input, 'input', ['float32', 'float64'], - 'roi_align') - check_variable_and_dtype(rois, 'rois', ['float32', 'float64'], 'roi_align') - helper = LayerHelper('roi_align', **locals()) - dtype = helper.input_dtype() - align_out = helper.create_variable_for_type_inference(dtype) - inputs = { - "X": input, - "ROIs": rois, - } - if rois_num is not None: - inputs['RoisNum'] = rois_num - helper.append_op( - type="roi_align", - inputs=inputs, - outputs={"Out": align_out}, - attrs={ - "pooled_height": pooled_height, - "pooled_width": pooled_width, - "spatial_scale": spatial_scale, - "sampling_ratio": sampling_ratio - }) - return align_out + else: + check_variable_and_dtype(input, 'input', ['float32', 'float64'], + 'roi_align') + check_variable_and_dtype(rois, 'rois', ['float32', 'float64'], + 'roi_align') + helper = LayerHelper('roi_align', **locals()) + dtype = helper.input_dtype() + align_out = helper.create_variable_for_type_inference(dtype) + inputs = { + "X": input, + "ROIs": rois, + } + if rois_num is not None: + inputs['RoisNum'] = rois_num + helper.append_op( + type="roi_align", + inputs=inputs, + outputs={"Out": align_out}, + attrs={ + "pooled_height": pooled_height, + "pooled_width": pooled_width, + "spatial_scale": spatial_scale, + "sampling_ratio": sampling_ratio + }) + return align_out +@paddle.jit.not_to_static def iou_similarity(x, y, box_normalized=True, name=None): """ Computes intersection-over-union (IOU) between two box lists. @@ -303,19 +318,20 @@ def iou_similarity(x, y, box_normalized=True, name=None): if in_dygraph_mode(): out = core.ops.iou_similarity(x, y, 'box_normalized', box_normalized) return out + else: + helper = LayerHelper("iou_similarity", **locals()) + out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper = LayerHelper("iou_similarity", **locals()) - out = helper.create_variable_for_type_inference(dtype=x.dtype) - - helper.append_op( - type="iou_similarity", - inputs={"X": x, - "Y": y}, - attrs={"box_normalized": box_normalized}, - outputs={"Out": out}) - return out + helper.append_op( + type="iou_similarity", + inputs={"X": x, + "Y": y}, + attrs={"box_normalized": box_normalized}, + outputs={"Out": out}) + return out +@paddle.jit.not_to_static def collect_fpn_proposals(multi_rois, multi_scores, min_level, @@ -398,34 +414,35 @@ def collect_fpn_proposals(multi_rois, attrs = ('post_nms_topN', post_nms_top_n) output_rois, rois_num = core.ops.collect_fpn_proposals( input_rois, input_scores, rois_num_per_level, *attrs) + return output_rois, rois_num - helper = LayerHelper('collect_fpn_proposals', **locals()) - dtype = helper.input_dtype('multi_rois') - check_dtype(dtype, 'multi_rois', ['float32', 'float64'], - 'collect_fpn_proposals') - output_rois = helper.create_variable_for_type_inference(dtype) - output_rois.stop_gradient = True - - inputs = { - 'MultiLevelRois': input_rois, - 'MultiLevelScores': input_scores, - } - outputs = {'FpnRois': output_rois} - if rois_num_per_level is not None: - inputs['MultiLevelRoIsNum'] = rois_num_per_level - rois_num = helper.create_variable_for_type_inference(dtype='int32') - rois_num.stop_gradient = True - outputs['RoisNum'] = rois_num - helper.append_op( - type='collect_fpn_proposals', - inputs=inputs, - outputs=outputs, - attrs={'post_nms_topN': post_nms_top_n}) - if rois_num_per_level is not None: + else: + helper = LayerHelper('collect_fpn_proposals', **locals()) + dtype = helper.input_dtype('multi_rois') + check_dtype(dtype, 'multi_rois', ['float32', 'float64'], + 'collect_fpn_proposals') + output_rois = helper.create_variable_for_type_inference(dtype) + output_rois.stop_gradient = True + + inputs = { + 'MultiLevelRois': input_rois, + 'MultiLevelScores': input_scores, + } + outputs = {'FpnRois': output_rois} + if rois_num_per_level is not None: + inputs['MultiLevelRoIsNum'] = rois_num_per_level + rois_num = helper.create_variable_for_type_inference(dtype='int32') + rois_num.stop_gradient = True + outputs['RoisNum'] = rois_num + helper.append_op( + type='collect_fpn_proposals', + inputs=inputs, + outputs=outputs, + attrs={'post_nms_topN': post_nms_top_n}) return output_rois, rois_num - return output_rois +@paddle.jit.not_to_static def distribute_fpn_proposals(fpn_rois, min_level, max_level, @@ -510,45 +527,46 @@ def distribute_fpn_proposals(fpn_rois, fpn_rois, rois_num, num_lvl, num_lvl, *attrs) return multi_rois, restore_ind, rois_num_per_level - check_variable_and_dtype(fpn_rois, 'fpn_rois', ['float32', 'float64'], - 'distribute_fpn_proposals') - helper = LayerHelper('distribute_fpn_proposals', **locals()) - dtype = helper.input_dtype('fpn_rois') - multi_rois = [ - helper.create_variable_for_type_inference(dtype) for i in range(num_lvl) - ] - - restore_ind = helper.create_variable_for_type_inference(dtype='int32') - - inputs = {'FpnRois': fpn_rois} - outputs = { - 'MultiFpnRois': multi_rois, - 'RestoreIndex': restore_ind, - } - - if rois_num is not None: - inputs['RoisNum'] = rois_num - rois_num_per_level = [ - helper.create_variable_for_type_inference(dtype='int32') + else: + check_variable_and_dtype(fpn_rois, 'fpn_rois', ['float32', 'float64'], + 'distribute_fpn_proposals') + helper = LayerHelper('distribute_fpn_proposals', **locals()) + dtype = helper.input_dtype('fpn_rois') + multi_rois = [ + helper.create_variable_for_type_inference(dtype) for i in range(num_lvl) ] - outputs['MultiLevelRoIsNum'] = rois_num_per_level - - helper.append_op( - type='distribute_fpn_proposals', - inputs=inputs, - outputs=outputs, - attrs={ - 'min_level': min_level, - 'max_level': max_level, - 'refer_level': refer_level, - 'refer_scale': refer_scale - }) - if rois_num is not None: + + restore_ind = helper.create_variable_for_type_inference(dtype='int32') + + inputs = {'FpnRois': fpn_rois} + outputs = { + 'MultiFpnRois': multi_rois, + 'RestoreIndex': restore_ind, + } + + if rois_num is not None: + inputs['RoisNum'] = rois_num + rois_num_per_level = [ + helper.create_variable_for_type_inference(dtype='int32') + for i in range(num_lvl) + ] + outputs['MultiLevelRoIsNum'] = rois_num_per_level + + helper.append_op( + type='distribute_fpn_proposals', + inputs=inputs, + outputs=outputs, + attrs={ + 'min_level': min_level, + 'max_level': max_level, + 'refer_level': refer_level, + 'refer_scale': refer_scale + }) return multi_rois, restore_ind, rois_num_per_level - return multi_rois, restore_ind +@paddle.jit.not_to_static def yolo_box( x, origin_shape, @@ -685,6 +703,7 @@ def yolo_box( return boxes, scores +@paddle.jit.not_to_static def prior_box(input, image, min_sizes, @@ -798,36 +817,37 @@ def prior_box(input, attrs = tuple(attrs) box, var = core.ops.prior_box(input, image, *attrs) return box, var + else: + attrs = { + 'min_sizes': min_sizes, + 'aspect_ratios': aspect_ratios, + 'variances': variance, + 'flip': flip, + 'clip': clip, + 'step_w': steps[0], + 'step_h': steps[1], + 'offset': offset, + 'min_max_aspect_ratios_order': min_max_aspect_ratios_order + } + + if cur_max_sizes is not None: + attrs['max_sizes'] = cur_max_sizes - attrs = { - 'min_sizes': min_sizes, - 'aspect_ratios': aspect_ratios, - 'variances': variance, - 'flip': flip, - 'clip': clip, - 'step_w': steps[0], - 'step_h': steps[1], - 'offset': offset, - 'min_max_aspect_ratios_order': min_max_aspect_ratios_order - } - - if cur_max_sizes is not None: - attrs['max_sizes'] = cur_max_sizes - - box = helper.create_variable_for_type_inference(dtype) - var = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="prior_box", - inputs={"Input": input, - "Image": image}, - outputs={"Boxes": box, - "Variances": var}, - attrs=attrs, ) - box.stop_gradient = True - var.stop_gradient = True - return box, var + box = helper.create_variable_for_type_inference(dtype) + var = helper.create_variable_for_type_inference(dtype) + helper.append_op( + type="prior_box", + inputs={"Input": input, + "Image": image}, + outputs={"Boxes": box, + "Variances": var}, + attrs=attrs, ) + box.stop_gradient = True + var.stop_gradient = True + return box, var +@paddle.jit.not_to_static def anchor_generator(input, anchor_sizes=None, aspect_ratios=None, @@ -916,27 +936,29 @@ def anchor_generator(input, anchor, var = core.ops.anchor_generator(input, *attrs) return anchor, var - attrs = { - 'anchor_sizes': anchor_sizes, - 'aspect_ratios': aspect_ratios, - 'variances': variance, - 'stride': stride, - 'offset': offset - } - - anchor = helper.create_variable_for_type_inference(dtype) - var = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="anchor_generator", - inputs={"Input": input}, - outputs={"Anchors": anchor, - "Variances": var}, - attrs=attrs, ) - anchor.stop_gradient = True - var.stop_gradient = True - return anchor, var + else: + attrs = { + 'anchor_sizes': anchor_sizes, + 'aspect_ratios': aspect_ratios, + 'variances': variance, + 'stride': stride, + 'offset': offset + } + + anchor = helper.create_variable_for_type_inference(dtype) + var = helper.create_variable_for_type_inference(dtype) + helper.append_op( + type="anchor_generator", + inputs={"Input": input}, + outputs={"Anchors": anchor, + "Variances": var}, + attrs=attrs, ) + anchor.stop_gradient = True + var.stop_gradient = True + return anchor, var +@paddle.jit.not_to_static def multiclass_nms(bboxes, scores, score_threshold, @@ -1091,6 +1113,7 @@ def multiclass_nms(bboxes, return output, nms_rois_num, index +@paddle.jit.not_to_static def matrix_nms(bboxes, scores, score_threshold, @@ -1196,41 +1219,42 @@ def matrix_nms(bboxes, if return_rois_num: return out, rois_num return out + else: + helper = LayerHelper('matrix_nms', **locals()) + output = helper.create_variable_for_type_inference(dtype=bboxes.dtype) + index = helper.create_variable_for_type_inference(dtype='int') + outputs = {'Out': output, 'Index': index} + if return_rois_num: + rois_num = helper.create_variable_for_type_inference(dtype='int') + outputs['RoisNum'] = rois_num - helper = LayerHelper('matrix_nms', **locals()) - output = helper.create_variable_for_type_inference(dtype=bboxes.dtype) - index = helper.create_variable_for_type_inference(dtype='int') - outputs = {'Out': output, 'Index': index} - if return_rois_num: - rois_num = helper.create_variable_for_type_inference(dtype='int') - outputs['RoisNum'] = rois_num - - helper.append_op( - type="matrix_nms", - inputs={'BBoxes': bboxes, - 'Scores': scores}, - attrs={ - 'background_label': background_label, - 'score_threshold': score_threshold, - 'post_threshold': post_threshold, - 'nms_top_k': nms_top_k, - 'gaussian_sigma': gaussian_sigma, - 'use_gaussian': use_gaussian, - 'keep_top_k': keep_top_k, - 'normalized': normalized - }, - outputs=outputs) - output.stop_gradient = True - - if return_index: + helper.append_op( + type="matrix_nms", + inputs={'BBoxes': bboxes, + 'Scores': scores}, + attrs={ + 'background_label': background_label, + 'score_threshold': score_threshold, + 'post_threshold': post_threshold, + 'nms_top_k': nms_top_k, + 'gaussian_sigma': gaussian_sigma, + 'use_gaussian': use_gaussian, + 'keep_top_k': keep_top_k, + 'normalized': normalized + }, + outputs=outputs) + output.stop_gradient = True + + if return_index: + if return_rois_num: + return output, index, rois_num + return output, index if return_rois_num: - return output, index, rois_num - return output, index - if return_rois_num: - return output, rois_num - return output + return output, rois_num + return output +@paddle.jit.not_to_static def box_coder(prior_box, prior_box_var, target_box, @@ -1357,32 +1381,34 @@ def box_coder(prior_box, raise TypeError( "Input variance of box_coder must be Variable or list") return output_box - - helper = LayerHelper("box_coder", **locals()) - - output_box = helper.create_variable_for_type_inference( - dtype=prior_box.dtype) - - inputs = {"PriorBox": prior_box, "TargetBox": target_box} - attrs = { - "code_type": code_type, - "box_normalized": box_normalized, - "axis": axis - } - if isinstance(prior_box_var, Variable): - inputs['PriorBoxVar'] = prior_box_var - elif isinstance(prior_box_var, list): - attrs['variance'] = prior_box_var else: - raise TypeError("Input variance of box_coder must be Variable or list") - helper.append_op( - type="box_coder", - inputs=inputs, - attrs=attrs, - outputs={"OutputBox": output_box}) - return output_box + helper = LayerHelper("box_coder", **locals()) + output_box = helper.create_variable_for_type_inference( + dtype=prior_box.dtype) + inputs = {"PriorBox": prior_box, "TargetBox": target_box} + attrs = { + "code_type": code_type, + "box_normalized": box_normalized, + "axis": axis + } + if isinstance(prior_box_var, Variable): + inputs['PriorBoxVar'] = prior_box_var + elif isinstance(prior_box_var, list): + attrs['variance'] = prior_box_var + else: + raise TypeError( + "Input variance of box_coder must be Variable or list") + helper.append_op( + type="box_coder", + inputs=inputs, + attrs=attrs, + outputs={"OutputBox": output_box}) + return output_box + + +@paddle.jit.not_to_static def generate_proposals(scores, bbox_deltas, im_shape, @@ -1472,56 +1498,55 @@ def generate_proposals(scores, scores, bbox_deltas, im_shape, anchors, variances, *attrs) return rpn_rois, rpn_roi_probs, rpn_rois_num - helper = LayerHelper('generate_proposals_v2', **locals()) - - check_variable_and_dtype(scores, 'scores', ['float32'], - 'generate_proposals_v2') - check_variable_and_dtype(bbox_deltas, 'bbox_deltas', ['float32'], - 'generate_proposals_v2') - check_variable_and_dtype(im_shape, 'im_shape', ['float32', 'float64'], - 'generate_proposals_v2') - check_variable_and_dtype(anchors, 'anchors', ['float32'], - 'generate_proposals_v2') - check_variable_and_dtype(variances, 'variances', ['float32'], - 'generate_proposals_v2') - - rpn_rois = helper.create_variable_for_type_inference( - dtype=bbox_deltas.dtype) - rpn_roi_probs = helper.create_variable_for_type_inference( - dtype=scores.dtype) - outputs = { - 'RpnRois': rpn_rois, - 'RpnRoiProbs': rpn_roi_probs, - } - if return_rois_num: - rpn_rois_num = helper.create_variable_for_type_inference(dtype='int32') - rpn_rois_num.stop_gradient = True - outputs['RpnRoisNum'] = rpn_rois_num - - helper.append_op( - type="generate_proposals_v2", - inputs={ - 'Scores': scores, - 'BboxDeltas': bbox_deltas, - 'ImShape': im_shape, - 'Anchors': anchors, - 'Variances': variances - }, - attrs={ - 'pre_nms_topN': pre_nms_top_n, - 'post_nms_topN': post_nms_top_n, - 'nms_thresh': nms_thresh, - 'min_size': min_size, - 'eta': eta - }, - outputs=outputs) - rpn_rois.stop_gradient = True - rpn_roi_probs.stop_gradient = True - - if return_rois_num: - return rpn_rois, rpn_roi_probs, rpn_rois_num else: - return rpn_rois, rpn_roi_probs + helper = LayerHelper('generate_proposals_v2', **locals()) + + check_variable_and_dtype(scores, 'scores', ['float32'], + 'generate_proposals_v2') + check_variable_and_dtype(bbox_deltas, 'bbox_deltas', ['float32'], + 'generate_proposals_v2') + check_variable_and_dtype(im_shape, 'im_shape', ['float32', 'float64'], + 'generate_proposals_v2') + check_variable_and_dtype(anchors, 'anchors', ['float32'], + 'generate_proposals_v2') + check_variable_and_dtype(variances, 'variances', ['float32'], + 'generate_proposals_v2') + + rpn_rois = helper.create_variable_for_type_inference( + dtype=bbox_deltas.dtype) + rpn_roi_probs = helper.create_variable_for_type_inference( + dtype=scores.dtype) + outputs = { + 'RpnRois': rpn_rois, + 'RpnRoiProbs': rpn_roi_probs, + } + if return_rois_num: + rpn_rois_num = helper.create_variable_for_type_inference( + dtype='int32') + rpn_rois_num.stop_gradient = True + outputs['RpnRoisNum'] = rpn_rois_num + + helper.append_op( + type="generate_proposals_v2", + inputs={ + 'Scores': scores, + 'BboxDeltas': bbox_deltas, + 'ImShape': im_shape, + 'Anchors': anchors, + 'Variances': variances + }, + attrs={ + 'pre_nms_topN': pre_nms_top_n, + 'post_nms_topN': post_nms_top_n, + 'nms_thresh': nms_thresh, + 'min_size': min_size, + 'eta': eta + }, + outputs=outputs) + rpn_rois.stop_gradient = True + rpn_roi_probs.stop_gradient = True + + return rpn_rois, rpn_roi_probs, rpn_rois_num def sigmoid_cross_entropy_with_logits(input, diff --git a/ppdet/py_op/post_process.py b/ppdet/py_op/post_process.py index 58cfcd2c18d231bf2d6d8bcb8ec1b30721305fd8..dd35c45e90a2d4a0d3d77817635d4659002facef 100755 --- a/ppdet/py_op/post_process.py +++ b/ppdet/py_op/post_process.py @@ -73,7 +73,8 @@ def bbox_post_process(bboxes, @jit -def mask_post_process(bboxes, +def mask_post_process(bbox, + bbox_nums, masks, im_shape, scale_factor, @@ -81,7 +82,6 @@ def mask_post_process(bboxes, binary_thresh=0.5): if masks.shape[0] == 0: return masks - bbox, bbox_nums = bboxes M = resolution scale = (M + 2.0) / M boxes = bbox[:, 2:] @@ -98,7 +98,6 @@ def mask_post_process(bboxes, boxes_n = boxes[st_num:end_num] labels_n = labels[st_num:end_num] masks_n = masks[st_num:end_num] - im_h = int(round(im_shape[i][0] / scale_factor[i])) im_w = int(round(im_shape[i][1] / scale_factor[i])) boxes_n = expand_bbox(boxes_n, scale) diff --git a/ppdet/utils/eval_utils.py b/ppdet/utils/eval_utils.py index f6eba837d3d9f2bdb6236c8db72df85d22f5cd99..79f5e104de2202417803546a7452088a30d92ae5 100644 --- a/ppdet/utils/eval_utils.py +++ b/ppdet/utils/eval_utils.py @@ -5,7 +5,7 @@ from __future__ import print_function import os import sys import json -from ppdet.py_op.post_process import get_det_res, get_seg_res +from ppdet.py_op.post_process import get_det_res, get_seg_res, mask_post_process import logging logger = logging.getLogger(__name__) @@ -33,7 +33,8 @@ def json_eval_results(metric, json_directory=None, dataset=None): logger.info("{} not exists!".format(v_json)) -def get_infer_results(outs_res, eval_type, catid): +def get_infer_results(outs_res, eval_type, catid, im_info, + mask_resolution=None): """ Get result at the stage of inference. The output format is dictionary containing bbox or mask result. @@ -49,16 +50,25 @@ def get_infer_results(outs_res, eval_type, catid): if 'bbox' in eval_type: box_res = [] - for outs in outs_res: - box_res += get_det_res(outs['bbox'], outs['bbox_num'], - outs['im_id'], catid) + for i, outs in enumerate(outs_res): + im_ids = im_info[i][2] + box_res += get_det_res(outs['bbox'].numpy(), + outs['bbox_num'].numpy(), im_ids, catid) infer_res['bbox'] = box_res if 'mask' in eval_type: seg_res = [] - for outs in outs_res: - seg_res += get_seg_res(outs['mask'], outs['bbox_num'], - outs['im_id'], catid) + # mask post process + for i, outs in enumerate(outs_res): + im_shape = im_info[i][0] + scale_factor = im_info[i][1] + im_ids = im_info[i][2] + mask = mask_post_process(outs['bbox'].numpy(), + outs['bbox_num'].numpy(), + outs['mask'].numpy(), im_shape, + scale_factor[0], mask_resolution) + seg_res += get_seg_res(mask, outs['bbox_num'].numpy(), im_ids, + catid) infer_res['mask'] = seg_res return infer_res diff --git a/tools/eval.py b/tools/eval.py index b5d874df22626c999ec1bec296002fd41d0c23f9..14d3953684b57c46dbcbd6246270e886281b8015 100755 --- a/tools/eval.py +++ b/tools/eval.py @@ -75,12 +75,18 @@ def run(FLAGS, cfg, place): outs_res = [] start_time = time.time() sample_num = 0 + im_info = [] for iter_id, data in enumerate(eval_loader): # forward + fields = cfg['EvalReader']['inputs_def']['fields'] model.eval() - outs = model(data, cfg['EvalReader']['inputs_def']['fields'], 'infer') + outs = model(data=data, input_def=fields, mode='infer') outs_res.append(outs) - + im_info.append([ + data[fields.index('im_shape')].numpy(), + data[fields.index('scale_factor')].numpy(), + data[fields.index('im_id')].numpy() + ]) # log sample_num += len(data) if iter_id % 100 == 0: @@ -102,7 +108,15 @@ def run(FLAGS, cfg, place): clsid2catid, catid2name = get_category_info(anno_file, with_background, use_default_label) - infer_res = get_infer_results(outs_res, eval_type, clsid2catid) + mask_resolution = None + if cfg['MaskPostProcess']['mask_resolution'] is not None: + mask_resolution = int(cfg['MaskPostProcess']['mask_resolution']) + infer_res = get_infer_results( + outs_res, + eval_type, + clsid2catid, + im_info, + mask_resolution=mask_resolution) eval_results(infer_res, cfg.metric, anno_file) diff --git a/tools/export_model.py b/tools/export_model.py index 888bb3e3742446847d00e0838b4e74cd7dfae0e6..ad7434b82e46a090fe1cdc869f203e06a066ee84 100644 --- a/tools/export_model.py +++ b/tools/export_model.py @@ -53,63 +53,43 @@ def parse_args(): return args +def dygraph_to_static(model, save_dir, cfg): + if not os.path.exists(save_dir): + os.makedirs(save_dir) + inputs_def = cfg['TestReader']['inputs_def'] + image_shape = inputs_def.get('image_shape') + if image_shape is None: + image_shape = [3, None, None] + # Save infer cfg + dump_infer_config(cfg, os.path.join(save_dir, 'infer_cfg.yml'), image_shape) + + input_spec = [{ + "image": InputSpec( + shape=[None] + image_shape, name='image'), + "im_shape": InputSpec( + shape=[None, 2], name='im_shape'), + "scale_factor": InputSpec( + shape=[None, 2], name='scale_factor') + }] + + export_model = to_static(model, input_spec=input_spec) + # save Model + paddle.jit.save(export_model, os.path.join(save_dir, 'model')) + + def run(FLAGS, cfg): # Model main_arch = cfg.architecture model = create(cfg.architecture) - inputs_def = cfg['TestReader']['inputs_def'] - assert 'image_shape' in inputs_def, 'image_shape must be specified.' - image_shape = inputs_def.get('image_shape') - - assert not None in image_shape, 'image_shape should not contain None' cfg_name = os.path.basename(FLAGS.config).split('.')[0] save_dir = os.path.join(FLAGS.output_dir, cfg_name) - if not os.path.exists(save_dir): - os.makedirs(save_dir) - - image_shape = dump_infer_config(cfg, - os.path.join(save_dir, 'infer_cfg.yml'), - image_shape) - - class ExportModel(nn.Layer): - def __init__(self, model): - super(ExportModel, self).__init__() - self.model = model - - @to_static(input_spec=[ - { - 'image': InputSpec( - shape=[None] + image_shape, name='image') - }, - { - 'im_shape': InputSpec( - shape=[None, 2], name='im_shape') - }, - { - 'scale_factor': InputSpec( - shape=[None, 2], name='scale_factor') - }, - ]) - def forward(self, image, im_shape, scale_factor): - inputs = {} - inputs_tensor = [image, im_shape, scale_factor] - for t in inputs_tensor: - inputs.update(t) - outs = self.model.get_export_model(inputs) - return outs - - export_model = ExportModel(model) - # debug for dy2static, remove later - #paddle.jit.set_code_level() # Init Model - load_weight(export_model.model, cfg.weights) - - export_model.eval() + load_weight(model, cfg.weights) # export config and model - paddle.jit.save(export_model, os.path.join(save_dir, 'model')) + dygraph_to_static(model, save_dir, cfg) logger.info('Export model to {}'.format(save_dir)) diff --git a/tools/export_utils.py b/tools/export_utils.py index 6a50ddfd7941c841063b3965da3e8345855f60b0..2430774872607c6448221475cfb8d9cacd960a60 100644 --- a/tools/export_utils.py +++ b/tools/export_utils.py @@ -109,7 +109,8 @@ def dump_infer_config(config, path, image_shape): os._exit(0) if 'Mask' in config['architecture']: - infer_cfg['mask_resolution'] = config['Mask']['mask_resolution'] + infer_cfg['mask_resolution'] = config['MaskPostProcess'][ + 'mask_resolution'] infer_cfg['with_background'], infer_cfg['Preprocess'], infer_cfg[ 'label_list'], image_shape = parse_reader( config['TestReader'], config['TestDataset'], config['metric'], diff --git a/tools/infer.py b/tools/infer.py index 69a8ab5576c35b9bfc61f318be541bbfbfdf497e..10e2ea0eb71e924e993cf99cd69811c64622d4cd 100755 --- a/tools/infer.py +++ b/tools/infer.py @@ -147,15 +147,32 @@ def run(FLAGS, cfg, place): # Run Infer for iter_id, data in enumerate(test_loader): # forward + fields = cfg.TestReader['inputs_def']['fields'] model.eval() - outs = model(data, cfg.TestReader['inputs_def']['fields'], 'infer') - - batch_res = get_infer_results([outs], outs.keys(), clsid2catid) + outs = model( + data=data, + input_def=cfg.TestReader['inputs_def']['fields'], + mode='infer') + im_info = [[ + data[fields.index('im_shape')].numpy(), + data[fields.index('scale_factor')].numpy(), + data[fields.index('im_id')].numpy() + ]] + im_ids = data[fields.index('im_id')].numpy() + + mask_resolution = None + if cfg['MaskPostProcess']['mask_resolution'] is not None: + mask_resolution = int(cfg['MaskPostProcess']['mask_resolution']) + batch_res = get_infer_results( + [outs], + outs.keys(), + clsid2catid, + im_info, + mask_resolution=mask_resolution) logger.info('Infer iter {}'.format(iter_id)) bbox_res = None mask_res = None - im_ids = outs['im_id'] bbox_num = outs['bbox_num'] start = 0 for i, im_id in enumerate(im_ids): diff --git a/tools/train.py b/tools/train.py index 0cc3d249dec6b7c407f66dd5a48137614e70fa51..121bb1d576ab39db91d9616ce715b57fe2ef37f6 100755 --- a/tools/train.py +++ b/tools/train.py @@ -35,6 +35,7 @@ from ppdet.utils.stats import TrainingStats from ppdet.utils.check import check_gpu, check_version, check_config from ppdet.utils.cli import ArgsParser from ppdet.utils.checkpoint import load_weight, load_pretrain_weight, save_model +from export_model import dygraph_to_static from paddle.distributed import ParallelEnv import logging FORMAT = '%(asctime)s-%(levelname)s: %(message)s' @@ -149,6 +150,8 @@ def run(FLAGS, cfg, place): model = paddle.DataParallel(model) fields = train_loader.collate_fn.output_fields + cfg_name = os.path.basename(FLAGS.config).split('.')[0] + save_dir = os.path.join(cfg.save_dir, cfg_name) # Run Train time_stat = deque(maxlen=cfg.log_iter) start_time = time.time() @@ -167,7 +170,7 @@ def run(FLAGS, cfg, place): # Model Forward model.train() - outputs = model(data, fields, 'train') + outputs = model(data=data, input_def=fields, mode='train') # Model Backward loss = outputs['loss'] @@ -193,11 +196,12 @@ def run(FLAGS, cfg, place): if ParallelEnv().local_rank == 0 and ( cur_eid % cfg.snapshot_epoch == 0 or (cur_eid + 1) == int(cfg.epoch)): - cfg_name = os.path.basename(FLAGS.config).split('.')[0] save_name = str(cur_eid) if cur_eid + 1 != int( cfg.epoch) else "model_final" - save_dir = os.path.join(cfg.save_dir, cfg_name) save_model(model, optimizer, save_dir, save_name, cur_eid + 1) + # TODO(guanghua): dygraph model to static model + # if ParallelEnv().local_rank == 0 and (cur_eid + 1) == int(cfg.epoch)): + # dygraph_to_static(model, os.path.join(save_dir, 'static_model_final'), cfg) def main():