未验证 提交 0e45394c 编写于 作者: K Kaipeng Deng 提交者: GitHub

Merge pull request #2072 from heavengate/fix_yolo_param_pick

[cherry-pick] refine yolov3 param name
...@@ -142,6 +142,8 @@ Evalutaion result is shown as below: ...@@ -142,6 +142,8 @@ Evalutaion result is shown as below:
| 416x416 | 36.5 | 58.2 | 39.1 | | 416x416 | 36.5 | 58.2 | 39.1 |
| 320x320 | 34.1 | 55.4 | 36.3 | | 320x320 | 34.1 | 55.4 | 36.3 |
- **NOTE:** evaluations based on `pycocotools` evaluator, predict bounding boxes with `score < 0.05` were not filtered out. Some frameworks which filtered out predict bounding boxes with `score < 0.05` will cause a drop in accuracy.
## Inference and Visualization ## Inference and Visualization
Inference is used to get prediction score or image features based on trained models. `infer.py` is the main executor for inference, one can start infer step by: Inference is used to get prediction score or image features based on trained models. `infer.py` is the main executor for inference, one can start infer step by:
......
...@@ -142,6 +142,8 @@ Train Loss ...@@ -142,6 +142,8 @@ Train Loss
| 416x416 | 36.5 | 58.2 | 39.1 | | 416x416 | 36.5 | 58.2 | 39.1 |
| 320x320 | 34.1 | 55.4 | 36.3 | | 320x320 | 34.1 | 55.4 | 36.3 |
- **注意: **评估结果基于`pycocotools`评估器,没有滤除`score < 0.05`的预测框,其他框架有此滤除操作会导致精度下降。
## 模型推断及可视化 ## 模型推断及可视化
......
...@@ -51,7 +51,14 @@ def random_distort(img): ...@@ -51,7 +51,14 @@ def random_distort(img):
return img return img
def random_crop(img, boxes, labels, scores, scales=[0.3, 1.0], max_ratio=2.0, constraints=None, max_trial=50): def random_crop(img,
boxes,
labels,
scores,
scales=[0.3, 1.0],
max_ratio=2.0,
constraints=None,
max_trial=50):
if len(boxes) == 0: if len(boxes) == 0:
return img, boxes return img, boxes
...@@ -90,10 +97,12 @@ def random_crop(img, boxes, labels, scores, scales=[0.3, 1.0], max_ratio=2.0, co ...@@ -90,10 +97,12 @@ def random_crop(img, boxes, labels, scores, scales=[0.3, 1.0], max_ratio=2.0, co
while crops: while crops:
crop = crops.pop(np.random.randint(0, len(crops))) crop = crops.pop(np.random.randint(0, len(crops)))
crop_boxes, crop_labels, crop_scores, box_num = box_utils.box_crop(boxes, labels, scores, crop, (w, h)) crop_boxes, crop_labels, crop_scores, box_num = \
box_utils.box_crop(boxes, labels, scores, crop, (w, h))
if box_num < 1: if box_num < 1:
continue continue
img = img.crop((crop[0], crop[1], crop[0] + crop[2], crop[1] + crop[3])).resize(img.size, Image.LANCZOS) img = img.crop((crop[0], crop[1], crop[0] + crop[2],
crop[1] + crop[3])).resize(img.size, Image.LANCZOS)
img = np.asarray(img) img = np.asarray(img)
return img, crop_boxes, crop_labels, crop_scores return img, crop_boxes, crop_labels, crop_scores
img = np.asarray(img) img = np.asarray(img)
...@@ -118,10 +127,16 @@ def random_interp(img, size, interp=None): ...@@ -118,10 +127,16 @@ def random_interp(img, size, interp=None):
h, w, _ = img.shape h, w, _ = img.shape
im_scale_x = size / float(w) im_scale_x = size / float(w)
im_scale_y = size / float(h) im_scale_y = size / float(h)
img = cv2.resize(img, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=interp) img = cv2.resize(img, None, None, fx=im_scale_x, fy=im_scale_y,
interpolation=interp)
return img return img
def random_expand(img, gtboxes, max_ratio=4., fill=None, keep_ratio=True, thresh=0.5): def random_expand(img,
gtboxes,
max_ratio=4.,
fill=None,
keep_ratio=True,
thresh=0.5):
if random.random() > thresh: if random.random() > thresh:
return img, gtboxes return img, gtboxes
...@@ -153,13 +168,21 @@ def random_expand(img, gtboxes, max_ratio=4., fill=None, keep_ratio=True, thresh ...@@ -153,13 +168,21 @@ def random_expand(img, gtboxes, max_ratio=4., fill=None, keep_ratio=True, thresh
return out_img.astype('uint8'), gtboxes return out_img.astype('uint8'), gtboxes
def shuffle_gtbox(gtbox, gtlabel, gtscore): def shuffle_gtbox(gtbox, gtlabel, gtscore):
gt = np.concatenate([gtbox, gtlabel[:, np.newaxis], gtscore[:, np.newaxis]], axis=1) gt = np.concatenate([gtbox, gtlabel[:, np.newaxis],
gtscore[:, np.newaxis]], axis=1)
idx = np.arange(gt.shape[0]) idx = np.arange(gt.shape[0])
np.random.shuffle(idx) np.random.shuffle(idx)
gt = gt[idx, :] gt = gt[idx, :]
return gt[:, :4], gt[:, 4], gt[:, 5] return gt[:, :4], gt[:, 4], gt[:, 5]
def image_mixup(img1, gtboxes1, gtlabels1, gtscores1, img2, gtboxes2, gtlabels2, gtscores2): def image_mixup(img1,
gtboxes1,
gtlabels1,
gtscores1,
img2,
gtboxes2,
gtlabels2,
gtscores2):
factor = np.random.beta(1.5, 1.5) factor = np.random.beta(1.5, 1.5)
factor = max(0.0, min(1.0, factor)) factor = max(0.0, min(1.0, factor))
if factor >= 1.0: if factor >= 1.0:
...@@ -173,7 +196,8 @@ def image_mixup(img1, gtboxes1, gtlabels1, gtscores1, img2, gtboxes2, gtlabels2, ...@@ -173,7 +196,8 @@ def image_mixup(img1, gtboxes1, gtlabels1, gtscores1, img2, gtboxes2, gtlabels2,
w = max(img1.shape[1], img2.shape[1]) w = max(img1.shape[1], img2.shape[1])
img = np.zeros((h, w, img1.shape[2]), 'float32') img = np.zeros((h, w, img1.shape[2]), 'float32')
img[:img1.shape[0], :img1.shape[1], :] = img1.astype('float32') * factor img[:img1.shape[0], :img1.shape[1], :] = img1.astype('float32') * factor
img[:img2.shape[0], :img2.shape[1], :] += img2.astype('float32') * (1.0 - factor) img[:img2.shape[0], :img2.shape[1], :] += \
img2.astype('float32') * (1.0 - factor)
gtboxes = np.zeros_like(gtboxes1) gtboxes = np.zeros_like(gtboxes1)
gtlabels = np.zeros_like(gtlabels1) gtlabels = np.zeros_like(gtlabels1)
gtscores = np.zeros_like(gtscores1) gtscores = np.zeros_like(gtscores1)
...@@ -208,7 +232,8 @@ def image_mixup(img1, gtboxes1, gtlabels1, gtscores1, img2, gtboxes2, gtlabels2, ...@@ -208,7 +232,8 @@ def image_mixup(img1, gtboxes1, gtlabels1, gtscores1, img2, gtboxes2, gtlabels2,
def image_augment(img, gtboxes, gtlabels, gtscores, size, means=None): def image_augment(img, gtboxes, gtlabels, gtscores, size, means=None):
img = random_distort(img) img = random_distort(img)
img, gtboxes = random_expand(img, gtboxes, fill=means) img, gtboxes = random_expand(img, gtboxes, fill=means)
img, gtboxes, gtlabels, gtscores = random_crop(img, gtboxes, gtlabels, gtscores) img, gtboxes, gtlabels, gtscores = \
random_crop(img, gtboxes, gtlabels, gtscores)
img = random_interp(img, size) img = random_interp(img, size)
img, gtboxes = random_flip(img, gtboxes) img, gtboxes = random_flip(img, gtboxes)
gtboxes, gtlabels, gtscores = shuffle_gtbox(gtboxes, gtlabels, gtscores) gtboxes, gtlabels, gtscores = shuffle_gtbox(gtboxes, gtlabels, gtscores)
......
...@@ -55,7 +55,13 @@ def conv_bn_layer(input, ...@@ -55,7 +55,13 @@ def conv_bn_layer(input,
out = fluid.layers.leaky_relu(x=out, alpha=0.1) out = fluid.layers.leaky_relu(x=out, alpha=0.1)
return out return out
def downsample(input, ch_out, filter_size=3, stride=2, padding=1, is_test=True, name=None): def downsample(input,
ch_out,
filter_size=3,
stride=2,
padding=1,
is_test=True,
name=None):
return conv_bn_layer(input, return conv_bn_layer(input,
ch_out=ch_out, ch_out=ch_out,
filter_size=filter_size, filter_size=filter_size,
...@@ -65,15 +71,19 @@ def downsample(input, ch_out, filter_size=3, stride=2, padding=1, is_test=True, ...@@ -65,15 +71,19 @@ def downsample(input, ch_out, filter_size=3, stride=2, padding=1, is_test=True,
name=name) name=name)
def basicblock(input, ch_out, is_test=True, name=None): def basicblock(input, ch_out, is_test=True, name=None):
conv1 = conv_bn_layer(input, ch_out, 1, 1, 0, is_test=is_test, name=name+".0") conv1 = conv_bn_layer(input, ch_out, 1, 1, 0,
conv2 = conv_bn_layer(conv1, ch_out*2, 3, 1, 1, is_test=is_test, name=name+".1") is_test=is_test, name=name+".0")
conv2 = conv_bn_layer(conv1, ch_out*2, 3, 1, 1,
is_test=is_test, name=name+".1")
out = fluid.layers.elementwise_add(x=input, y=conv2, act=None) out = fluid.layers.elementwise_add(x=input, y=conv2, act=None)
return out return out
def layer_warp(block_func, input, ch_out, count, is_test=True, name=None): def layer_warp(block_func, input, ch_out, count, is_test=True, name=None):
res_out = block_func(input, ch_out, is_test=is_test, name='{}.0'.format(name)) res_out = block_func(input, ch_out, is_test=is_test,
name='{}.0'.format(name))
for j in range(1, count): for j in range(1, count):
res_out = block_func(res_out, ch_out, is_test=is_test, name='{}.{}'.format(name, j)) res_out = block_func(res_out, ch_out, is_test=is_test,
name='{}.{}'.format(name, j))
return res_out return res_out
DarkNet_cfg = { DarkNet_cfg = {
...@@ -83,14 +93,21 @@ DarkNet_cfg = { ...@@ -83,14 +93,21 @@ DarkNet_cfg = {
def add_DarkNet53_conv_body(body_input, is_test=True): def add_DarkNet53_conv_body(body_input, is_test=True):
stages, block_func = DarkNet_cfg[53] stages, block_func = DarkNet_cfg[53]
stages = stages[0:5] stages = stages[0:5]
conv1 = conv_bn_layer( conv1 = conv_bn_layer(body_input, ch_out=32, filter_size=3,
body_input, ch_out=32, filter_size=3, stride=1, padding=1, is_test=is_test, name="yolo_input") stride=1, padding=1, is_test=is_test,
downsample_ = downsample(conv1, ch_out=conv1.shape[1]*2, is_test=is_test, name="yolo_input.downsample") name="yolo_input")
downsample_ = downsample(conv1, ch_out=conv1.shape[1]*2,
is_test=is_test,
name="yolo_input.downsample")
blocks = [] blocks = []
for i, stage in enumerate(stages): for i, stage in enumerate(stages):
block = layer_warp(block_func, downsample_, 32 *(2**i), stage, is_test=is_test, name="stage.{}".format(i)) block = layer_warp(block_func, downsample_, 32 *(2**i),
stage, is_test=is_test,
name="stage.{}".format(i))
blocks.append(block) blocks.append(block)
if i < len(stages) - 1: # do not downsaple in the last stage if i < len(stages) - 1: # do not downsaple in the last stage
downsample_ = downsample(block, ch_out=block.shape[1]*2, is_test=is_test, name="stage.{}.downsample".format(i)) downsample_ = downsample(block, ch_out=block.shape[1]*2,
is_test=is_test,
name="stage.{}.downsample".format(i))
return blocks[-1:-4:-1] return blocks[-1:-4:-1]
...@@ -27,13 +27,22 @@ from .darknet import add_DarkNet53_conv_body ...@@ -27,13 +27,22 @@ from .darknet import add_DarkNet53_conv_body
from .darknet import conv_bn_layer from .darknet import conv_bn_layer
def yolo_detection_block(input, channel, is_test=True, name=None): def yolo_detection_block(input, channel, is_test=True, name=None):
assert channel % 2 == 0, "channel {} cannot be divided by 2".format(channel) assert channel % 2 == 0, \
"channel {} cannot be divided by 2".format(channel)
conv = input conv = input
for j in range(2): for j in range(2):
conv = conv_bn_layer(conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.{}.0'.format(name, j)) conv = conv_bn_layer(conv, channel, filter_size=1,
conv = conv_bn_layer(conv, channel*2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.{}.1'.format(name, j)) stride=1, padding=0, is_test=is_test,
route = conv_bn_layer(conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.2'.format(name)) name='{}.{}.0'.format(name, j))
tip = conv_bn_layer(route,channel*2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.tip'.format(name)) conv = conv_bn_layer(conv, channel*2, filter_size=3,
stride=1, padding=1, is_test=is_test,
name='{}.{}.1'.format(name, j))
route = conv_bn_layer(conv, channel, filter_size=1, stride=1,
padding=0, is_test=is_test,
name='{}.2'.format(name))
tip = conv_bn_layer(route,channel*2, filter_size=3, stride=1,
padding=1, is_test=is_test,
name='{}.tip'.format(name))
return route, tip return route, tip
def upsample(input, scale=2,name=None): def upsample(input, scale=2,name=None):
...@@ -68,11 +77,15 @@ class YOLOv3(object): ...@@ -68,11 +77,15 @@ class YOLOv3(object):
if self.is_train: if self.is_train:
self.py_reader = fluid.layers.py_reader( self.py_reader = fluid.layers.py_reader(
capacity=64, capacity=64,
shapes = [[-1] + self.image_shape, [-1, cfg.max_box_num, 4], [-1, cfg.max_box_num], [-1, cfg.max_box_num]], shapes = [[-1] + self.image_shape,
[-1, cfg.max_box_num, 4],
[-1, cfg.max_box_num],
[-1, cfg.max_box_num]],
lod_levels=[0, 0, 0, 0], lod_levels=[0, 0, 0, 0],
dtypes=['float32'] * 2 + ['int32'] + ['float32'], dtypes=['float32'] * 2 + ['int32'] + ['float32'],
use_double_buffer=True) use_double_buffer=True)
self.image, self.gtbox, self.gtlabel, self.gtscore = fluid.layers.read_file(self.py_reader) self.image, self.gtbox, self.gtlabel, self.gtscore = \
fluid.layers.read_file(self.py_reader)
else: else:
self.image = fluid.layers.data( self.image = fluid.layers.data(
name='image', shape=self.image_shape, dtype='float32' name='image', shape=self.image_shape, dtype='float32'
...@@ -139,9 +152,9 @@ class YOLOv3(object): ...@@ -139,9 +152,9 @@ class YOLOv3(object):
if self.is_train: if self.is_train:
loss = fluid.layers.yolov3_loss( loss = fluid.layers.yolov3_loss(
x=out, x=out,
gtbox=self.gtbox, gt_box=self.gtbox,
gtlabel=self.gtlabel, gt_label=self.gtlabel,
gtscore=self.gtscore, gt_score=self.gtscore,
anchors=cfg.anchors, anchors=cfg.anchors,
anchor_mask=anchor_mask, anchor_mask=anchor_mask,
class_num=cfg.class_num, class_num=cfg.class_num,
......
...@@ -53,13 +53,17 @@ class DataSetReader(object): ...@@ -53,13 +53,17 @@ class DataSetReader(object):
cfg.dataset)) cfg.dataset))
if mode == 'train': if mode == 'train':
cfg.train_file_list = os.path.join(cfg.data_dir, cfg.train_file_list) cfg.train_file_list = os.path.join(cfg.data_dir,
cfg.train_data_dir = os.path.join(cfg.data_dir, cfg.train_data_dir) cfg.train_file_list)
cfg.train_data_dir = os.path.join(cfg.data_dir,
cfg.train_data_dir)
self.COCO = COCO(cfg.train_file_list) self.COCO = COCO(cfg.train_file_list)
self.img_dir = cfg.train_data_dir self.img_dir = cfg.train_data_dir
elif mode == 'test' or mode == 'infer': elif mode == 'test' or mode == 'infer':
cfg.val_file_list = os.path.join(cfg.data_dir, cfg.val_file_list) cfg.val_file_list = os.path.join(cfg.data_dir,
cfg.val_data_dir = os.path.join(cfg.data_dir, cfg.val_data_dir) cfg.val_file_list)
cfg.val_data_dir = os.path.join(cfg.data_dir,
cfg.val_data_dir)
self.COCO = COCO(cfg.val_file_list) self.COCO = COCO(cfg.val_file_list)
self.img_dir = cfg.val_data_dir self.img_dir = cfg.val_data_dir
...@@ -88,7 +92,8 @@ class DataSetReader(object): ...@@ -88,7 +92,8 @@ class DataSetReader(object):
def _parse_gt_annotations(self, img): def _parse_gt_annotations(self, img):
img_height = img['height'] img_height = img['height']
img_width = img['width'] img_width = img['width']
anno = self.COCO.loadAnns(self.COCO.getAnnIds(imgIds=img['id'], iscrowd=None)) anno = self.COCO.loadAnns(
self.COCO.getAnnIds(imgIds=img['id'], iscrowd=None))
gt_index = 0 gt_index = 0
for target in anno: for target in anno:
if target['area'] < cfg.gt_min_area: if target['area'] < cfg.gt_min_area:
...@@ -96,13 +101,15 @@ class DataSetReader(object): ...@@ -96,13 +101,15 @@ class DataSetReader(object):
if 'ignore' in target and target['ignore']: if 'ignore' in target and target['ignore']:
continue continue
box = box_utils.coco_anno_box_to_center_relative(target['bbox'], img_height, img_width) box = box_utils.coco_anno_box_to_center_relative(
target['bbox'], img_height, img_width)
if box[2] <= 0 and box[3] <= 0: if box[2] <= 0 and box[3] <= 0:
continue continue
img['gt_id'][gt_index] = np.int32(target['id']) img['gt_id'][gt_index] = np.int32(target['id'])
img['gt_boxes'][gt_index] = box img['gt_boxes'][gt_index] = box
img['gt_labels'][gt_index] = self.category_to_id_map[target['category_id']] img['gt_labels'][gt_index] = \
self.category_to_id_map[target['category_id']]
gt_index += 1 gt_index += 1
if gt_index >= cfg.max_box_num: if gt_index >= cfg.max_box_num:
break break
...@@ -136,10 +143,18 @@ class DataSetReader(object): ...@@ -136,10 +143,18 @@ class DataSetReader(object):
else: else:
return self._parse_images(is_train=(mode=='train')) return self._parse_images(is_train=(mode=='train'))
def get_reader(self, mode, size=416, batch_size=None, shuffle=False, mixup_iter=0, random_sizes=[], image=None): def get_reader(self,
mode,
size=416,
batch_size=None,
shuffle=False,
mixup_iter=0,
random_sizes=[],
image=None):
assert mode in ['train', 'test', 'infer'], "Unknow mode type!" assert mode in ['train', 'test', 'infer'], "Unknow mode type!"
if mode != 'infer': if mode != 'infer':
assert batch_size is not None, "batch size connot be None in mode {}".format(mode) assert batch_size is not None, \
"batch size connot be None in mode {}".format(mode)
self._parse_dataset_dir(mode) self._parse_dataset_dir(mode)
self._parse_dataset_catagory() self._parse_dataset_catagory()
...@@ -151,7 +166,9 @@ class DataSetReader(object): ...@@ -151,7 +166,9 @@ class DataSetReader(object):
h, w, _ = im.shape h, w, _ = im.shape
im_scale_x = size / float(w) im_scale_x = size / float(w)
im_scale_y = size / float(h) im_scale_y = size / float(h)
out_img = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=cv2.INTER_CUBIC) out_img = cv2.resize(im, None, None,
fx=im_scale_x, fy=im_scale_y,
interpolation=cv2.INTER_CUBIC)
mean = np.array(mean).reshape((1, 1, -1)) mean = np.array(mean).reshape((1, 1, -1))
std = np.array(std).reshape((1, 1, -1)) std = np.array(std).reshape((1, 1, -1))
out_img = (out_img / 255.0 - mean) / std out_img = (out_img / 255.0 - mean) / std
...@@ -173,11 +190,14 @@ class DataSetReader(object): ...@@ -173,11 +190,14 @@ class DataSetReader(object):
mixup_gt_boxes = np.array(mixup_img['gt_boxes']).copy() mixup_gt_boxes = np.array(mixup_img['gt_boxes']).copy()
mixup_gt_labels = np.array(mixup_img['gt_labels']).copy() mixup_gt_labels = np.array(mixup_img['gt_labels']).copy()
mixup_gt_scores = np.ones_like(mixup_gt_labels) mixup_gt_scores = np.ones_like(mixup_gt_labels)
im, gt_boxes, gt_labels, gt_scores = image_utils.image_mixup(im, gt_boxes, \ im, gt_boxes, gt_labels, gt_scores = \
gt_labels, gt_scores, mixup_im, mixup_gt_boxes, mixup_gt_labels, \ image_utils.image_mixup(im, gt_boxes, gt_labels,
mixup_gt_scores) gt_scores, mixup_im, mixup_gt_boxes,
mixup_gt_labels, mixup_gt_scores)
im, gt_boxes, gt_labels, gt_scores = image_utils.image_augment(im, gt_boxes, gt_labels, gt_scores, size, mean) im, gt_boxes, gt_labels, gt_scores = \
image_utils.image_augment(im, gt_boxes, gt_labels,
gt_scores, size, mean)
mean = np.array(mean).reshape((1, 1, -1)) mean = np.array(mean).reshape((1, 1, -1))
std = np.array(std).reshape((1, 1, -1)) std = np.array(std).reshape((1, 1, -1))
...@@ -214,7 +234,9 @@ class DataSetReader(object): ...@@ -214,7 +234,9 @@ class DataSetReader(object):
read_cnt += 1 read_cnt += 1
if read_cnt % len(imgs) == 0 and shuffle: if read_cnt % len(imgs) == 0 and shuffle:
np.random.shuffle(imgs) np.random.shuffle(imgs)
im, gt_boxes, gt_labels, gt_scores = img_reader_with_augment(img, img_size, cfg.pixel_means, cfg.pixel_stds, mixup_img) im, gt_boxes, gt_labels, gt_scores = \
img_reader_with_augment(img, img_size, cfg.pixel_means,
cfg.pixel_stds, mixup_img)
batch_out.append([im, gt_boxes, gt_labels, gt_scores]) batch_out.append([im, gt_boxes, gt_labels, gt_scores])
if len(batch_out) == batch_size: if len(batch_out) == batch_size:
...@@ -227,7 +249,9 @@ class DataSetReader(object): ...@@ -227,7 +249,9 @@ class DataSetReader(object):
imgs = self._parse_images_by_mode(mode) imgs = self._parse_images_by_mode(mode)
batch_out = [] batch_out = []
for img in imgs: for img in imgs:
im, im_id, im_shape = img_reader(img, size, cfg.pixel_means, cfg.pixel_stds) im, im_id, im_shape = img_reader(img, size,
cfg.pixel_means,
cfg.pixel_stds)
batch_out.append((im, im_id, im_shape)) batch_out.append((im, im_id, im_shape))
if len(batch_out) == batch_size: if len(batch_out) == batch_size:
yield batch_out yield batch_out
...@@ -238,7 +262,9 @@ class DataSetReader(object): ...@@ -238,7 +262,9 @@ class DataSetReader(object):
img = {} img = {}
img['image'] = image img['image'] = image
img['id'] = 0 img['id'] = 0
im, im_id, im_shape = img_reader(img, size, cfg.pixel_means, cfg.pixel_stds) im, im_id, im_shape = img_reader(img, size,
cfg.pixel_means,
cfg.pixel_stds)
batch_out = [(im, im_id, im_shape)] batch_out = [(im, im_id, im_shape)]
yield batch_out yield batch_out
...@@ -256,7 +282,8 @@ def train(size=416, ...@@ -256,7 +282,8 @@ def train(size=416,
num_workers=8, num_workers=8,
max_queue=32, max_queue=32,
use_multiprocessing=True): use_multiprocessing=True):
generator = dsr.get_reader('train', size, batch_size, shuffle, int(mixup_iter/num_workers), random_sizes) generator = dsr.get_reader('train', size, batch_size, shuffle,
int(mixup_iter/num_workers), random_sizes)
if not use_multiprocessing: if not use_multiprocessing:
return generator return generator
......
...@@ -90,7 +90,13 @@ def train(): ...@@ -90,7 +90,13 @@ def train():
total_iter = cfg.max_iter - cfg.start_iter total_iter = cfg.max_iter - cfg.start_iter
mixup_iter = total_iter - cfg.no_mixup_iter mixup_iter = total_iter - cfg.no_mixup_iter
train_reader = reader.train(input_size, batch_size=cfg.batch_size, shuffle=True, total_iter=total_iter*devices_num, mixup_iter=mixup_iter*devices_num, random_sizes=random_sizes, use_multiprocessing=cfg.use_multiprocess) train_reader = reader.train(input_size,
batch_size=cfg.batch_size,
shuffle=True,
total_iter=total_iter*devices_num,
mixup_iter=mixup_iter*devices_num,
random_sizes=random_sizes,
use_multiprocessing=cfg.use_multiprocess)
py_reader = model.py_reader py_reader = model.py_reader
py_reader.decorate_paddle_reader(train_reader) py_reader.decorate_paddle_reader(train_reader)
...@@ -112,21 +118,25 @@ def train(): ...@@ -112,21 +118,25 @@ def train():
for iter_id in range(cfg.start_iter, cfg.max_iter): for iter_id in range(cfg.start_iter, cfg.max_iter):
prev_start_time = start_time prev_start_time = start_time
start_time = time.time() start_time = time.time()
losses = exe.run(compile_program, fetch_list=[v.name for v in fetch_list]) losses = exe.run(compile_program,
fetch_list=[v.name for v in fetch_list])
smoothed_loss.add_value(np.mean(np.array(losses[0]))) smoothed_loss.add_value(np.mean(np.array(losses[0])))
snapshot_loss += np.mean(np.array(losses[0])) snapshot_loss += np.mean(np.array(losses[0]))
snapshot_time += start_time - prev_start_time snapshot_time += start_time - prev_start_time
lr = np.array(fluid.global_scope().find_var('learning_rate') lr = np.array(fluid.global_scope().find_var('learning_rate')
.get_tensor()) .get_tensor())
print("Iter {:d}, lr {:.6f}, loss {:.6f}, time {:.5f}".format( print("Iter {:d}, lr {:.6f}, loss {:.6f}, time {:.5f}".format(
iter_id, lr[0], iter_id, lr[0],
smoothed_loss.get_mean_value(), start_time - prev_start_time)) smoothed_loss.get_mean_value(),
start_time - prev_start_time))
sys.stdout.flush() sys.stdout.flush()
if (iter_id + 1) % cfg.snapshot_iter == 0: if (iter_id + 1) % cfg.snapshot_iter == 0:
save_model("model_iter{}".format(iter_id)) save_model("model_iter{}".format(iter_id))
print("Snapshot {} saved, average loss: {}, average time: {}".format( print("Snapshot {} saved, average loss: {}, \
iter_id + 1, snapshot_loss / float(cfg.snapshot_iter), average time: {}".format(
snapshot_time / float(cfg.snapshot_iter))) iter_id + 1,
snapshot_loss / float(cfg.snapshot_iter),
snapshot_time / float(cfg.snapshot_iter)))
snapshot_loss = 0 snapshot_loss = 0
snapshot_time = 0 snapshot_time = 0
except fluid.core.EOFException: except fluid.core.EOFException:
......
...@@ -101,27 +101,30 @@ def parse_args(): ...@@ -101,27 +101,30 @@ def parse_args():
add_arg('dataset', str, 'coco2017', "Dataset: coco2014, coco2017.") add_arg('dataset', str, 'coco2017', "Dataset: coco2014, coco2017.")
add_arg('class_num', int, 80, "Class number.") add_arg('class_num', int, 80, "Class number.")
add_arg('data_dir', str, 'dataset/coco', "The data root path.") add_arg('data_dir', str, 'dataset/coco', "The data root path.")
add_arg('start_iter', int, 0, "Start iteration.") add_arg('start_iter', int, 0, "Start iteration.")
add_arg('use_multiprocess', bool, True, "add multiprocess.") add_arg('use_multiprocess', bool, True, "add multiprocess.")
#SOLVER #SOLVER
add_arg('batch_size', int, 8, "Mini-batch size per device.") add_arg('batch_size', int, 8, "Mini-batch size per device.")
add_arg('learning_rate', float, 0.001, "Learning rate.") add_arg('learning_rate', float, 0.001, "Learning rate.")
add_arg('max_iter', int, 500200, "Iter number.") add_arg('max_iter', int, 500200, "Iter number.")
add_arg('snapshot_iter', int, 2000, "Save model every snapshot stride.") add_arg('snapshot_iter', int, 2000, "Save model every snapshot stride.")
add_arg('label_smooth', bool, True, "Use label smooth in class label.") add_arg('label_smooth', bool, True, "Use label smooth in class label.")
add_arg('no_mixup_iter', int, 40000, "Disable mixup in last N iter.") add_arg('no_mixup_iter', int, 40000, "Disable mixup in last N iter.")
# TRAIN TEST INFER # TRAIN TEST INFER
add_arg('input_size', int, 608, "Image input size of YOLOv3.") add_arg('input_size', int, 608, "Image input size of YOLOv3.")
add_arg('random_shape', bool, True, "Resize to random shape for train reader.") add_arg('random_shape', bool, True, "Resize to random shape for train reader.")
add_arg('valid_thresh', float, 0.005, "Valid confidence score for NMS.") add_arg('valid_thresh', float, 0.005, "Valid confidence score for NMS.")
add_arg('nms_thresh', float, 0.45, "NMS threshold.") add_arg('nms_thresh', float, 0.45, "NMS threshold.")
add_arg('nms_topk', int, 400, "The number of boxes to perform NMS.") add_arg('nms_topk', int, 400, "The number of boxes to perform NMS.")
add_arg('nms_posk', int, 100, "The number of boxes of NMS output.") add_arg('nms_posk', int, 100, "The number of boxes of NMS output.")
add_arg('debug', bool, False, "Debug mode") add_arg('debug', bool, False, "Debug mode")
# SINGLE EVAL AND DRAW # SINGLE EVAL AND DRAW
add_arg('image_path', str, 'image', "The image path used to inference and visualize.") add_arg('image_path', str, 'image',
add_arg('image_name', str, None, "The single image used to inference and visualize. None to inference all images in image_path") "The image path used to inference and visualize.")
add_arg('draw_thresh', float, 0.5, "Confidence score threshold to draw prediction box in image in debug mode") add_arg('image_name', str, None,
"The single image used to inference and visualize. None to inference all images in image_path")
add_arg('draw_thresh', float, 0.5,
"Confidence score threshold to draw prediction box in image in debug mode")
# yapf: enable # yapf: enable
args = parser.parse_args() args = parser.parse_args()
file_name = sys.argv[0] file_name = sys.argv[0]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册