提交 2a5fd326 编写于 作者: Q qingqing01 提交者: GitHub

Fix ResizeImage and data transform for SSD (#2950)

* Fix ResizeImage and data transform for SSD
* Fix code style
上级 3a2a8ff2
...@@ -10,11 +10,11 @@ log_smooth_window: 1 ...@@ -10,11 +10,11 @@ log_smooth_window: 1
metric: VOC metric: VOC
save_dir: output save_dir: output
weights: output/ssd_mobilenet_v1_voc/model_final/ weights: output/ssd_mobilenet_v1_voc/model_final/
num_classes: 21
SSD: SSD:
backbone: MobileNet backbone: MobileNet
multi_box_head: MultiBoxHead multi_box_head: MultiBoxHead
num_classes: 21
metric: metric:
ap_version: 11point ap_version: 11point
evaluate_difficult: false evaluate_difficult: false
......
...@@ -87,7 +87,7 @@ results of image size 608/416/320 above. ...@@ -87,7 +87,7 @@ results of image size 608/416/320 above.
| Backbone | Size | Image/gpu | Lr schd | Box AP | Download | | Backbone | Size | Image/gpu | Lr schd | Box AP | Download |
| :----------- | :--: | :-----: | :-----: | :----: | :-------: | | :----------- | :--: | :-----: | :-----: | :----: | :-------: |
| MobileNet v1 | 300 | 32 | 120e | 73.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_voc.tar) | | MobileNet v1 | 300 | 32 | 120e | 73.13 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_voc.tar) |
**NOTE**: SSD is trained in 2 GPU with totoal batch size as 64 and trained 120 epoches. SSD training data augmentations: randomly color distortion, **NOTE**: SSD is trained in 2 GPU with totoal batch size as 64 and trained 120 epoches. SSD training data augmentations: randomly color distortion,
randomly cropping, randomly expansion, randomly flipping. randomly cropping, randomly expansion, randomly flipping.
...@@ -744,7 +744,6 @@ class SSDEvalFeed(DataFeed): ...@@ -744,7 +744,6 @@ class SSDEvalFeed(DataFeed):
DecodeImage(to_rgb=True, with_mixup=False), DecodeImage(to_rgb=True, with_mixup=False),
NormalizeBox(), NormalizeBox(),
ResizeImage(target_size=300, use_cv2=False, interp=1), ResizeImage(target_size=300, use_cv2=False, interp=1),
RandomFlipImage(is_normalized=True),
Permute(), Permute(),
NormalizeImage( NormalizeImage(
mean=[127.5, 127.5, 127.5], mean=[127.5, 127.5, 127.5],
......
...@@ -76,6 +76,7 @@ class RoiDbSource(Dataset): ...@@ -76,6 +76,7 @@ class RoiDbSource(Dataset):
self._mixup_epoch = mixup_epoch self._mixup_epoch = mixup_epoch
self._with_background = with_background self._with_background = with_background
self.cname2cid = cname2cid self.cname2cid = cname2cid
self._imid2path = None
def __str__(self): def __str__(self):
return 'RoiDbSource(fname:%s,epoch:%d,size:%d,pos:%d)' \ return 'RoiDbSource(fname:%s,epoch:%d,size:%d,pos:%d)' \
...@@ -156,3 +157,14 @@ class RoiDbSource(Dataset): ...@@ -156,3 +157,14 @@ class RoiDbSource(Dataset):
""" return epoch id for latest sample """ return epoch id for latest sample
""" """
return self._epoch return self._epoch
def get_imid2path(self):
"""return image id to image path map"""
if self._imid2path is None:
self._imid2path = {}
for record in self._roidb:
im_id = record['im_id']
im_id = im_id if isinstance(im_id, int) else im_id[0]
im_path = os.path.join(self._image_dir, record['im_file'])
self._imid2path[im_id] = im_path
return self._imid2path
...@@ -126,6 +126,11 @@ class ResizeImage(BaseOperator): ...@@ -126,6 +126,11 @@ class ResizeImage(BaseOperator):
interp=cv2.INTER_LINEAR, interp=cv2.INTER_LINEAR,
use_cv2=True): use_cv2=True):
""" """
Rescale image to the specified target size, and capped at max_size
if max_size != 0.
If target_size is list, selected a scale randomly as the specified
target size.
Args: Args:
target_size (int|list): the target size of image's short side, target_size (int|list): the target size of image's short side,
multi-scale training is adopted when type is list. multi-scale training is adopted when type is list.
...@@ -172,15 +177,19 @@ class ResizeImage(BaseOperator): ...@@ -172,15 +177,19 @@ class ResizeImage(BaseOperator):
im_scale = float(self.max_size) / float(im_size_max) im_scale = float(self.max_size) / float(im_size_max)
im_scale_x = im_scale im_scale_x = im_scale
im_scale_y = im_scale im_scale_y = im_scale
resize_w = np.round(im_scale_x * float(im_shape[1]))
resize_h = np.round(im_scale_y * float(im_shape[0]))
sample['im_info'] = np.array( sample['im_info'] = np.array(
[ [resize_h, resize_w, im_scale], dtype=np.float32)
np.round(im_shape[0] * im_scale),
np.round(im_shape[1] * im_scale), im_scale
],
dtype=np.float32)
else: else:
im_scale_x = float(selected_size) / float(im_shape[1]) im_scale_x = float(selected_size) / float(im_shape[1])
im_scale_y = float(selected_size) / float(im_shape[0]) im_scale_y = float(selected_size) / float(im_shape[0])
resize_w = selected_size
resize_h = selected_size
if self.use_cv2: if self.use_cv2:
im = cv2.resize( im = cv2.resize(
im, im,
...@@ -191,8 +200,6 @@ class ResizeImage(BaseOperator): ...@@ -191,8 +200,6 @@ class ResizeImage(BaseOperator):
interpolation=self.interp) interpolation=self.interp)
else: else:
im = Image.fromarray(im) im = Image.fromarray(im)
resize_w = selected_size * im_scale_x
resize_h = selected_size * im_scale_y
im = im.resize((resize_w, resize_h), self.interp) im = im.resize((resize_w, resize_h), self.interp)
im = np.array(im) im = np.array(im)
......
...@@ -39,6 +39,7 @@ class SSD(object): ...@@ -39,6 +39,7 @@ class SSD(object):
__category__ = 'architecture' __category__ = 'architecture'
__inject__ = ['backbone', 'multi_box_head', 'output_decoder', 'metric'] __inject__ = ['backbone', 'multi_box_head', 'output_decoder', 'metric']
__shared__ = ['num_classes']
def __init__(self, def __init__(self,
backbone, backbone,
...@@ -100,4 +101,3 @@ class SSD(object): ...@@ -100,4 +101,3 @@ class SSD(object):
# SSD use output_decoder in output layers, bbox is normalized # SSD use output_decoder in output layers, bbox is normalized
# to range [0, 1], is_bbox_normalized is used in infer.py # to range [0, 1], is_bbox_normalized is used in infer.py
return True return True
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册