提交 2a5fd326 编写于 作者: Q qingqing01 提交者: GitHub

Fix ResizeImage and data transform for SSD (#2950)

* Fix ResizeImage and data transform for SSD
* Fix code style
上级 3a2a8ff2
......@@ -10,11 +10,11 @@ log_smooth_window: 1
metric: VOC
save_dir: output
weights: output/ssd_mobilenet_v1_voc/model_final/
num_classes: 21
SSD:
backbone: MobileNet
multi_box_head: MultiBoxHead
num_classes: 21
metric:
ap_version: 11point
evaluate_difficult: false
......
......@@ -87,7 +87,7 @@ results of image size 608/416/320 above.
| Backbone | Size | Image/gpu | Lr schd | Box AP | Download |
| :----------- | :--: | :-----: | :-----: | :----: | :-------: |
| MobileNet v1 | 300 | 32 | 120e | 73.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_voc.tar) |
| MobileNet v1 | 300 | 32 | 120e | 73.13 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_voc.tar) |
**NOTE**: SSD is trained in 2 GPU with totoal batch size as 64 and trained 120 epoches. SSD training data augmentations: randomly color distortion,
randomly cropping, randomly expansion, randomly flipping.
......@@ -744,7 +744,6 @@ class SSDEvalFeed(DataFeed):
DecodeImage(to_rgb=True, with_mixup=False),
NormalizeBox(),
ResizeImage(target_size=300, use_cv2=False, interp=1),
RandomFlipImage(is_normalized=True),
Permute(),
NormalizeImage(
mean=[127.5, 127.5, 127.5],
......
......@@ -76,6 +76,7 @@ class RoiDbSource(Dataset):
self._mixup_epoch = mixup_epoch
self._with_background = with_background
self.cname2cid = cname2cid
self._imid2path = None
def __str__(self):
return 'RoiDbSource(fname:%s,epoch:%d,size:%d,pos:%d)' \
......@@ -156,3 +157,14 @@ class RoiDbSource(Dataset):
""" return epoch id for latest sample
"""
return self._epoch
def get_imid2path(self):
"""return image id to image path map"""
if self._imid2path is None:
self._imid2path = {}
for record in self._roidb:
im_id = record['im_id']
im_id = im_id if isinstance(im_id, int) else im_id[0]
im_path = os.path.join(self._image_dir, record['im_file'])
self._imid2path[im_id] = im_path
return self._imid2path
......@@ -126,6 +126,11 @@ class ResizeImage(BaseOperator):
interp=cv2.INTER_LINEAR,
use_cv2=True):
"""
Rescale image to the specified target size, and capped at max_size
if max_size != 0.
If target_size is list, selected a scale randomly as the specified
target size.
Args:
target_size (int|list): the target size of image's short side,
multi-scale training is adopted when type is list.
......@@ -172,15 +177,19 @@ class ResizeImage(BaseOperator):
im_scale = float(self.max_size) / float(im_size_max)
im_scale_x = im_scale
im_scale_y = im_scale
resize_w = np.round(im_scale_x * float(im_shape[1]))
resize_h = np.round(im_scale_y * float(im_shape[0]))
sample['im_info'] = np.array(
[
np.round(im_shape[0] * im_scale),
np.round(im_shape[1] * im_scale), im_scale
],
dtype=np.float32)
[resize_h, resize_w, im_scale], dtype=np.float32)
else:
im_scale_x = float(selected_size) / float(im_shape[1])
im_scale_y = float(selected_size) / float(im_shape[0])
resize_w = selected_size
resize_h = selected_size
if self.use_cv2:
im = cv2.resize(
im,
......@@ -191,8 +200,6 @@ class ResizeImage(BaseOperator):
interpolation=self.interp)
else:
im = Image.fromarray(im)
resize_w = selected_size * im_scale_x
resize_h = selected_size * im_scale_y
im = im.resize((resize_w, resize_h), self.interp)
im = np.array(im)
......
......@@ -39,6 +39,7 @@ class SSD(object):
__category__ = 'architecture'
__inject__ = ['backbone', 'multi_box_head', 'output_decoder', 'metric']
__shared__ = ['num_classes']
def __init__(self,
backbone,
......@@ -100,4 +101,3 @@ class SSD(object):
# SSD use output_decoder in output layers, bbox is normalized
# to range [0, 1], is_bbox_normalized is used in infer.py
return True
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册