未验证 提交 ad353419 编写于 作者: W wangguanzhong 提交者: GitHub

[Dygraph]fix dygraph (#1883)

* fix dygraph

* refine keep_ratio in ResizeOp
上级 7257a364
......@@ -95,6 +95,7 @@ BBoxPostProcess:
name: RCNNBox
num_classes: 81
batch_size: 1
var_weight: 3.
nms:
name: MultiClassNMS
keep_top_k: 100
......
......@@ -92,6 +92,7 @@ BBoxPostProcess:
name: RCNNBox
num_classes: 81
batch_size: 1
var_weight: 3.
nms:
name: MultiClassNMS
keep_top_k: 100
......
......@@ -21,7 +21,7 @@ EvalReader:
sample_transforms:
- DecodeOp: { }
- NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] }
- ResizeOp: { interp: 1, target_size: [ 800, 1333 ] }
- ResizeOp: { interp: 1, target_size: [ 800, 1333 ], keep_ratio: True }
- PermuteOp: { }
batch_transforms:
- PadBatchOp: { pad_to_stride: 32, pad_gt: false }
......@@ -37,7 +37,7 @@ TestReader:
sample_transforms:
- DecodeOp: { }
- NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] }
- ResizeOp: { interp: 1, target_size: [ 800, 1333 ] }
- ResizeOp: { interp: 1, target_size: [ 800, 1333 ], keep_ratio: True }
- PermuteOp: { }
batch_transforms:
- PadBatchOp: { pad_to_stride: 32, pad_gt: false }
......
......@@ -21,7 +21,7 @@ EvalReader:
sample_transforms:
- DecodeOp: { }
- NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] }
- ResizeOp: { interp: 1, target_size: [ 800, 1333 ] }
- ResizeOp: { interp: 1, target_size: [ 800, 1333 ], keep_ratio: True }
- PermuteOp: { }
batch_transforms:
- PadBatchOp: { pad_to_stride: -1, pad_gt: false }
......@@ -37,7 +37,7 @@ TestReader:
sample_transforms:
- DecodeOp: { }
- NormalizeImageOp: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] }
- ResizeOp: { interp: 1, target_size: [ 800, 1333 ] }
- ResizeOp: { interp: 1, target_size: [ 800, 1333 ], keep_ratio: True }
- PermuteOp: { }
batch_transforms:
- PadBatchOp: { pad_to_stride: -1, pad_gt: false }
......
......@@ -21,7 +21,7 @@ EvalReader:
sample_transforms:
- DecodeOp: {}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeOp: {interp: 1, target_size: [800, 1333]}
- ResizeOp: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- PermuteOp: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false}
......@@ -37,7 +37,7 @@ TestReader:
sample_transforms:
- DecodeOp: {}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeOp: {interp: 1, target_size: [800, 1333]}
- ResizeOp: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- PermuteOp: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false}
......
......@@ -21,7 +21,7 @@ EvalReader:
sample_transforms:
- DecodeOp: {}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeOp: {interp: 1, target_size: [800, 1333]}
- ResizeOp: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- PermuteOp: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: -1., pad_gt: false}
......@@ -37,7 +37,7 @@ TestReader:
sample_transforms:
- DecodeOp: {}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeOp: {interp: 1, target_size: [800, 1333]}
- ResizeOp: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- PermuteOp: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: -1., pad_gt: false}
......
......@@ -123,7 +123,7 @@ class Detector(object):
boxes_tensor = self.predictor.get_output_handle(output_names[0])
np_boxes = boxes_tensor.copy_to_cpu()
if self.pred_config.mask_resolution is not None:
masks_tensor = self.predictor.get_output_handle(output_names[1])
masks_tensor = self.predictor.get_output_handle(output_names[2])
np_masks = masks_tensor.copy_to_cpu()
t1 = time.time()
......
......@@ -192,8 +192,8 @@ class PadStride(object):
im_info (dict): info of processed image
"""
coarsest_stride = self.coarsest_stride
if coarsest_stride == 0:
return im
if coarsest_stride <= 0:
return im, im_info
im_c, im_h, im_w = im.shape
pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
......
......@@ -158,7 +158,7 @@ class BatchRandomResizeOp(BaseOperator):
def __init__(self,
target_size,
keep_ratio=True,
keep_ratio,
interp=cv2.INTER_NEAREST,
random_size=True,
random_interp=False):
......
......@@ -577,7 +577,7 @@ class RandomFlipOp(BaseOperator):
@register_op
class ResizeOp(BaseOperator):
def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
"""
Resize image to target size. if keep_ratio is True,
resize the image's long side to the maximum of target_size
......
......@@ -113,12 +113,9 @@ class CascadeRCNN(BaseArch):
if self.inputs['mode'] == 'infer':
bbox_pred, bboxes = self.bbox_head.get_cascade_prediction(
self.bbox_head_list, rois_list)
self.bboxes = self.bbox_post_process(
bbox_pred,
bboxes,
self.bboxes = self.bbox_post_process(bbox_pred, bboxes,
self.inputs['im_shape'],
self.inputs['scale_factor'],
var_weight=3.)
self.inputs['scale_factor'])
if self.with_mask:
rois = rois_list[-1]
......
......@@ -161,18 +161,14 @@ class MaskHead(Layer):
if bbox.shape[0] == 0:
mask_head_out = paddle.full([1, 6], -1)
return mask_head_out
else:
# TODO(guanghua): Remove fluid dependency
scale_factor_list = paddle.fluid.layers.create_array('float32')
num_count = 0
for idx, num in enumerate(bbox_num):
for n in range(num):
paddle.fluid.layers.array_write(
x=scale_factor[idx, 0],
i=paddle.to_tensor(num_count),
array=scale_factor_list)
num_count += 1
scale_factor_list = []
for idx in range(bbox_num.shape[0]):
num = bbox_num[idx]
scale = scale_factor[idx, 0]
ones = paddle.ones(num)
scale_expand = ones * scale
scale_factor_list.append(scale_expand)
scale_factor_list = paddle.cast(
paddle.concat(scale_factor_list), 'float32')
scale_factor_list = paddle.reshape(scale_factor_list, shape=[-1, 1])
......
......@@ -263,7 +263,8 @@ class RCNNBox(object):
prior_box_var=[0.1, 0.1, 0.2, 0.2],
code_type="decode_center_size",
box_normalized=False,
axis=1):
axis=1,
var_weight=1.):
super(RCNNBox, self).__init__()
self.num_classes = num_classes
self.batch_size = batch_size
......@@ -271,13 +272,9 @@ class RCNNBox(object):
self.code_type = code_type
self.box_normalized = box_normalized
self.axis = axis
self.var_weight = var_weight
def __call__(self,
bbox_head_out,
rois,
im_shape,
scale_factor,
var_weight=1.):
def __call__(self, bbox_head_out, rois, im_shape, scale_factor):
bbox_pred, cls_prob = bbox_head_out
roi, rois_num = rois
origin_shape = im_shape / scale_factor
......@@ -296,7 +293,7 @@ class RCNNBox(object):
origin_shape = paddle.concat(origin_shape_list)
bbox = roi / scale
prior_box_var = [i / var_weight for i in self.prior_box_var]
prior_box_var = [i / self.var_weight for i in self.prior_box_var]
bbox = ops.box_coder(
prior_box=bbox,
prior_box_var=prior_box_var,
......
......@@ -16,14 +16,8 @@ class BBoxPostProcess(object):
self.decode = decode
self.nms = nms
def __call__(self,
head_out,
rois,
im_shape,
scale_factor=None,
var_weight=1.):
bboxes, score = self.decode(head_out, rois, im_shape, scale_factor,
var_weight)
def __call__(self, head_out, rois, im_shape, scale_factor=None):
bboxes, score = self.decode(head_out, rois, im_shape, scale_factor)
bbox_pred, bbox_num, _ = self.nms(bboxes, score)
return bbox_pred, bbox_num
......
......@@ -73,33 +73,34 @@ def bbox_post_process(bboxes,
@jit
def mask_post_process(bbox,
bbox_nums,
masks,
def mask_post_process(det_res,
im_shape,
scale_factor,
resolution=14,
binary_thresh=0.5):
bbox = det_res['bbox']
bbox_num = det_res['bbox_num']
masks = det_res['mask']
if masks.shape[0] == 0:
return masks
M = resolution
scale = (M + 2.0) / M
boxes = bbox[:, 2:]
labels = bbox[:, 0]
segms_results = [[] for _ in range(len(bbox_nums))]
segms_results = [[] for _ in range(len(bbox_num))]
sum = 0
st_num = 0
end_num = 0
for i in range(len(bbox_nums)):
bbox_num = bbox_nums[i]
end_num += bbox_num
for i in range(len(bbox_num)):
length = bbox_num[i]
end_num += length
cls_segms = []
boxes_n = boxes[st_num:end_num]
labels_n = labels[st_num:end_num]
masks_n = masks[st_num:end_num]
im_h = int(round(im_shape[i][0] / scale_factor[i]))
im_w = int(round(im_shape[i][1] / scale_factor[i]))
im_h = int(round(im_shape[i][0] / scale_factor[i, 0]))
im_w = int(round(im_shape[i][1] / scale_factor[i, 0]))
boxes_n = expand_bbox(boxes_n, scale)
boxes_n = boxes_n.astype(np.int32)
padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
......@@ -129,8 +130,8 @@ def mask_post_process(bbox,
im_mask[:, :, np.newaxis], order='F'))[0]
cls_segms.append(rle)
segms_results[i] = np.array(cls_segms)[:, np.newaxis]
st_num += bbox_num
segms_results = np.vstack([segms_results[k] for k in range(len(bbox_nums))])
st_num += length
segms_results = np.vstack([segms_results[k] for k in range(len(bbox_num))])
bboxes = np.hstack([segms_results, bbox])
return bboxes[:, :3]
......
......@@ -5,7 +5,7 @@ from __future__ import print_function
import os
import sys
import json
from ppdet.py_op.post_process import get_det_res, get_seg_res, mask_post_process
from ppdet.py_op.post_process import get_det_res, get_seg_res
import logging
logger = logging.getLogger(__name__)
......@@ -33,8 +33,7 @@ def json_eval_results(metric, json_directory=None, dataset=None):
logger.info("{} not exists!".format(v_json))
def get_infer_results(outs_res, eval_type, catid, im_info,
mask_resolution=None):
def get_infer_results(outs_res, eval_type, catid, im_info):
"""
Get result at the stage of inference.
The output format is dictionary containing bbox or mask result.
......@@ -52,8 +51,8 @@ def get_infer_results(outs_res, eval_type, catid, im_info,
box_res = []
for i, outs in enumerate(outs_res):
im_ids = im_info[i][2]
box_res += get_det_res(outs['bbox'].numpy(),
outs['bbox_num'].numpy(), im_ids, catid)
box_res += get_det_res(outs['bbox'], outs['bbox_num'], im_ids,
catid)
infer_res['bbox'] = box_res
if 'mask' in eval_type:
......@@ -63,12 +62,8 @@ def get_infer_results(outs_res, eval_type, catid, im_info,
im_shape = im_info[i][0]
scale_factor = im_info[i][1]
im_ids = im_info[i][2]
mask = mask_post_process(outs['bbox'].numpy(),
outs['bbox_num'].numpy(),
outs['mask'].numpy(), im_shape,
scale_factor[0], mask_resolution)
seg_res += get_seg_res(mask, outs['bbox_num'].numpy(), im_ids,
catid)
mask = outs['mask']
seg_res += get_seg_res(mask, outs['bbox_num'], im_ids, catid)
infer_res['mask'] = seg_res
return infer_res
......
......@@ -81,14 +81,22 @@ def run(FLAGS, cfg, place):
fields = cfg['EvalReader']['inputs_def']['fields']
model.eval()
outs = model(data=data, input_def=fields, mode='infer')
for key, value in outs.items():
outs[key] = value.numpy()
im_shape = data[fields.index('im_shape')].numpy()
scale_factor = data[fields.index('scale_factor')].numpy()
im_id = data[fields.index('im_id')].numpy()
im_info.append([im_shape, scale_factor, im_id])
if 'mask' in outs and 'bbox' in outs:
mask_resolution = model.mask_post_process.mask_resolution
from ppdet.py_op.post_process import mask_post_process
outs['mask'] = mask_post_process(outs, im_shape, scale_factor,
mask_resolution)
outs_res.append(outs)
im_info.append([
data[fields.index('im_shape')].numpy(),
data[fields.index('scale_factor')].numpy(),
data[fields.index('im_id')].numpy()
])
# log
sample_num += len(data)
sample_num += im_shape.shape[0]
if iter_id % 100 == 0:
logger.info("Eval iter: {}".format(iter_id))
......@@ -96,8 +104,10 @@ def run(FLAGS, cfg, place):
logger.info('Total sample number: {}, averge FPS: {}'.format(
sample_num, sample_num / cost_time))
eval_type = ['bbox']
if getattr(cfg, 'MaskHead', None):
eval_type = []
if 'bbox' in outs:
eval_type.append('bbox')
if 'mask' in outs:
eval_type.append('mask')
# Metric
# TODO: support other metric
......@@ -108,16 +118,7 @@ def run(FLAGS, cfg, place):
clsid2catid, catid2name = get_category_info(anno_file, with_background,
use_default_label)
mask_resolution = None
if 'Mask' in cfg.architecture and cfg['MaskPostProcess'][
'mask_resolution'] is not None:
mask_resolution = int(cfg['MaskPostProcess']['mask_resolution'])
infer_res = get_infer_results(
outs_res,
eval_type,
clsid2catid,
im_info,
mask_resolution=mask_resolution)
infer_res = get_infer_results(outs_res, eval_type, clsid2catid, im_info)
eval_results(infer_res, cfg.metric, anno_file)
......
......@@ -61,7 +61,9 @@ def dygraph_to_static(model, save_dir, cfg):
if image_shape is None:
image_shape = [3, None, None]
# Save infer cfg
dump_infer_config(cfg, os.path.join(save_dir, 'infer_cfg.yml'), image_shape)
dump_infer_config(cfg,
os.path.join(save_dir, 'infer_cfg.yml'), image_shape,
model)
input_spec = [{
"image": InputSpec(
......
......@@ -64,9 +64,11 @@ def parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape):
for key, value in st.items():
p = {'type': key}
if key == 'ResizeOp':
if value.get('keep_ratio', False):
if value.get('keep_ratio',
False) and image_shape[1] is not None:
max_size = max(image_shape[1:])
image_shape = [3, max_size, max_size]
value['target_size'] = image_shape[1:]
p.update(value)
preprocess_list.append(p)
batch_transforms = reader_cfg.get('batch_transforms', None)
......@@ -74,7 +76,7 @@ def parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape):
methods = [list(bt.keys())[0] for bt in batch_transforms]
for bt in batch_transforms:
for key, value in bt.items():
if key == 'PadBatch':
if key == 'PadBatchOp':
preprocess_list.append({'type': 'PadStride'})
preprocess_list[-1].update({
'stride': value['pad_to_stride']
......@@ -84,7 +86,7 @@ def parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape):
return with_background, preprocess_list, label_list, image_shape
def dump_infer_config(config, path, image_shape):
def dump_infer_config(config, path, image_shape, model):
arch_state = False
from ppdet.core.config.yaml_helpers import setup_orderdict
setup_orderdict()
......@@ -107,10 +109,8 @@ def dump_infer_config(config, path, image_shape):
'Architecture: {} is not supported for exporting model now'.format(
infer_arch))
os._exit(0)
if 'Mask' in config['architecture']:
infer_cfg['mask_resolution'] = config['MaskPostProcess'][
'mask_resolution']
if 'mask_post_process' in model.__dict__:
infer_cfg['mask_resolution'] = model.mask_post_process.mask_resolution
infer_cfg['with_background'], infer_cfg['Preprocess'], infer_cfg[
'label_list'], image_shape = parse_reader(
config['TestReader'], config['TestDataset'], config['metric'],
......
......@@ -153,23 +153,26 @@ def run(FLAGS, cfg, place):
data=data,
input_def=cfg.TestReader['inputs_def']['fields'],
mode='infer')
im_info = [[
data[fields.index('im_shape')].numpy(),
data[fields.index('scale_factor')].numpy(),
data[fields.index('im_id')].numpy()
]]
for key, value in outs.items():
outs[key] = value.numpy()
im_shape = data[fields.index('im_shape')].numpy()
scale_factor = data[fields.index('scale_factor')].numpy()
im_ids = data[fields.index('im_id')].numpy()
im_info = [im_shape, scale_factor, im_ids]
mask_resolution = None
if 'Mask' in cfg.architecture and cfg['MaskPostProcess'][
'mask_resolution'] is not None:
mask_resolution = int(cfg['MaskPostProcess']['mask_resolution'])
batch_res = get_infer_results(
[outs],
outs.keys(),
clsid2catid,
im_info,
mask_resolution=mask_resolution)
if 'mask' in outs and 'bbox' in outs:
mask_resolution = model.mask_post_process.mask_resolution
from ppdet.py_op.post_process import mask_post_process
outs['mask'] = mask_post_process(outs, im_shape, scale_factor,
mask_resolution)
eval_type = []
if 'bbox' in outs:
eval_type.append('bbox')
if 'mask' in outs:
eval_type.append('mask')
batch_res = get_infer_results([outs], eval_type, clsid2catid, [im_info])
logger.info('Infer iter {}'.format(iter_id))
bbox_res = None
mask_res = None
......@@ -177,7 +180,6 @@ def run(FLAGS, cfg, place):
bbox_num = outs['bbox_num']
start = 0
for i, im_id in enumerate(im_ids):
im_id = im_ids[i]
image_path = imid2path[int(im_id)]
image = Image.open(image_path).convert('RGB')
end = start + bbox_num[i]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册