提交 99e7dd5e 编写于 作者: u010070587's avatar u010070587 提交者: Kaipeng Deng

add yolov3 ce (#2312)

上级 27730332
#!/bin/bash
#This file is only used for continuous evaluation.
export CUDA_VISIBLE_DEVICES=0
python train.py --enable_ce True --use_multiprocess False --snapshot_iter 100 --max_iter 200 | python _ce.py
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python train.py --enable_ce True --use_multiprocess False --snapshot_iter 100 --max_iter 200 | python _ce.py
...@@ -62,7 +62,7 @@ The data catalog structure is as follows: ...@@ -62,7 +62,7 @@ The data catalog structure is as follows:
│   ├── 000000000139.jpg │   ├── 000000000139.jpg
│   ├── 000000000285.jpg │   ├── 000000000285.jpg
| ... | ...
``` ```
## Training ## Training
...@@ -170,7 +170,7 @@ Inference speed(Tesla P40): ...@@ -170,7 +170,7 @@ Inference speed(Tesla P40):
| input size | 608x608 | 416x416 | 320x320 | | input size | 608x608 | 416x416 | 320x320 |
|:-------------:| :-----: | :-----: | :-----: | |:-------------:| :-----: | :-----: | :-----: |
| infer speed | 48 ms/frame | 29 ms/frame |24 ms/frame | | infer speed | 48 ms/frame | 29 ms/frame |24 ms/frame |
Visualization of infer result is shown as below: Visualization of infer result is shown as below:
...@@ -181,4 +181,3 @@ Visualization of infer result is shown as below: ...@@ -181,4 +181,3 @@ Visualization of infer result is shown as below:
<img src="image/000000515077.png" height=300 width=400 hspace='10'/> <br /> <img src="image/000000515077.png" height=300 width=400 hspace='10'/> <br />
YOLOv3 Visualization Examples YOLOv3 Visualization Examples
</p> </p>
...@@ -172,7 +172,7 @@ Train Loss ...@@ -172,7 +172,7 @@ Train Loss
| input size | 608x608 | 416x416 | 320x320 | | input size | 608x608 | 416x416 | 320x320 |
|:-------------:| :-----: | :-----: | :-----: | |:-------------:| :-----: | :-----: | :-----: |
| infer speed | 48 ms/frame | 29 ms/frame |24 ms/frame | | infer speed | 48 ms/frame | 29 ms/frame |24 ms/frame |
下图为模型可视化预测结果: 下图为模型可视化预测结果:
<p align="center"> <p align="center">
...@@ -182,4 +182,3 @@ Train Loss ...@@ -182,4 +182,3 @@ Train Loss
<img src="image/000000515077.png" height=300 width=400 hspace='10'/> <br /> <img src="image/000000515077.png" height=300 width=400 hspace='10'/> <br />
YOLOv3 预测可视化 YOLOv3 预测可视化
</p> </p>
### This file is only used for continuous evaluation test!
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi
from kpi import DurationKpi
train_cost_1card_kpi = CostKpi(
'train_cost_1card', 0.02, 0, actived=True, desc='train cost')
train_duration_1card_kpi = DurationKpi(
'train_duration_1card', 0.1, 0, actived=True, desc='train duration')
train_cost_8card_kpi = CostKpi(
'train_cost_8card', 0.02, 0, actived=True, desc='train cost')
train_duration_8card_kpi = DurationKpi(
'train_duration_8card', 0.1, 0, actived=True, desc='train duration')
tracking_kpis = [
train_cost_1card_kpi, train_duration_1card_kpi, train_cost_8card_kpi,
train_duration_8card_kpi
]
def parse_log(log):
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
print("-----%s" % fs)
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
...@@ -47,6 +46,7 @@ def coco_anno_box_to_center_relative(box, img_height, img_width): ...@@ -47,6 +46,7 @@ def coco_anno_box_to_center_relative(box, img_height, img_width):
return np.array([x, y, w, h]) return np.array([x, y, w, h])
def clip_relative_box_in_image(x, y, w, h): def clip_relative_box_in_image(x, y, w, h):
"""Clip relative box coordinates x, y, w, h to [0, 1]""" """Clip relative box coordinates x, y, w, h to [0, 1]"""
x1 = max(x - w / 2, 0.) x1 = max(x - w / 2, 0.)
...@@ -58,6 +58,7 @@ def clip_relative_box_in_image(x, y, w, h): ...@@ -58,6 +58,7 @@ def clip_relative_box_in_image(x, y, w, h):
w = x2 - x1 w = x2 - x1
h = y2 - y1 h = y2 - y1
def box_xywh_to_xyxy(box): def box_xywh_to_xyxy(box):
shape = box.shape shape = box.shape
assert shape[-1] == 4, "Box shape[-1] should be 4." assert shape[-1] == 4, "Box shape[-1] should be 4."
...@@ -68,6 +69,7 @@ def box_xywh_to_xyxy(box): ...@@ -68,6 +69,7 @@ def box_xywh_to_xyxy(box):
box = box.reshape(shape) box = box.reshape(shape)
return box return box
def box_iou_xywh(box1, box2): def box_iou_xywh(box1, box2):
assert box1.shape[-1] == 4, "Box1 shape[-1] should be 4." assert box1.shape[-1] == 4, "Box1 shape[-1] should be 4."
assert box2.shape[-1] == 4, "Box2 shape[-1] should be 4." assert box2.shape[-1] == 4, "Box2 shape[-1] should be 4."
...@@ -92,6 +94,7 @@ def box_iou_xywh(box1, box2): ...@@ -92,6 +94,7 @@ def box_iou_xywh(box1, box2):
return inter_area / (b1_area + b2_area - inter_area) return inter_area / (b1_area + b2_area - inter_area)
def box_iou_xyxy(box1, box2): def box_iou_xyxy(box1, box2):
assert box1.shape[-1] == 4, "Box1 shape[-1] should be 4." assert box1.shape[-1] == 4, "Box1 shape[-1] should be 4."
assert box2.shape[-1] == 4, "Box2 shape[-1] should be 4." assert box2.shape[-1] == 4, "Box2 shape[-1] should be 4."
...@@ -114,17 +117,21 @@ def box_iou_xyxy(box1, box2): ...@@ -114,17 +117,21 @@ def box_iou_xyxy(box1, box2):
return inter_area / (b1_area + b2_area - inter_area) return inter_area / (b1_area + b2_area - inter_area)
def box_crop(boxes, labels, scores, crop, img_shape): def box_crop(boxes, labels, scores, crop, img_shape):
x, y, w, h = map(float, crop) x, y, w, h = map(float, crop)
im_w, im_h = map(float, img_shape) im_w, im_h = map(float, img_shape)
boxes = boxes.copy() boxes = boxes.copy()
boxes[:, 0], boxes[:, 2] = (boxes[:, 0] - boxes[:, 2] / 2) * im_w, (boxes[:, 0] + boxes[:, 2] / 2) * im_w boxes[:, 0], boxes[:, 2] = (boxes[:, 0] - boxes[:, 2] / 2) * im_w, (
boxes[:, 1], boxes[:, 3] = (boxes[:, 1] - boxes[:, 3] / 2) * im_h, (boxes[:, 1] + boxes[:, 3] / 2) * im_h boxes[:, 0] + boxes[:, 2] / 2) * im_w
boxes[:, 1], boxes[:, 3] = (boxes[:, 1] - boxes[:, 3] / 2) * im_h, (
boxes[:, 1] + boxes[:, 3] / 2) * im_h
crop_box = np.array([x, y, x + w, y + h]) crop_box = np.array([x, y, x + w, y + h])
centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
mask = np.logical_and(crop_box[:2] <= centers, centers <= crop_box[2:]).all(axis=1) mask = np.logical_and(crop_box[:2] <= centers, centers <= crop_box[2:]).all(
axis=1)
boxes[:, :2] = np.maximum(boxes[:, :2], crop_box[:2]) boxes[:, :2] = np.maximum(boxes[:, :2], crop_box[:2])
boxes[:, 2:] = np.minimum(boxes[:, 2:], crop_box[2:]) boxes[:, 2:] = np.minimum(boxes[:, 2:], crop_box[2:])
...@@ -135,12 +142,20 @@ def box_crop(boxes, labels, scores, crop, img_shape): ...@@ -135,12 +142,20 @@ def box_crop(boxes, labels, scores, crop, img_shape):
boxes = boxes * np.expand_dims(mask.astype('float32'), axis=1) boxes = boxes * np.expand_dims(mask.astype('float32'), axis=1)
labels = labels * mask.astype('float32') labels = labels * mask.astype('float32')
scores = scores * mask.astype('float32') scores = scores * mask.astype('float32')
boxes[:, 0], boxes[:, 2] = (boxes[:, 0] + boxes[:, 2]) / 2 / w, (boxes[:, 2] - boxes[:, 0]) / w boxes[:, 0], boxes[:, 2] = (boxes[:, 0] + boxes[:, 2]) / 2 / w, (
boxes[:, 1], boxes[:, 3] = (boxes[:, 1] + boxes[:, 3]) / 2 / h, (boxes[:, 3] - boxes[:, 1]) / h boxes[:, 2] - boxes[:, 0]) / w
boxes[:, 1], boxes[:, 3] = (boxes[:, 1] + boxes[:, 3]) / 2 / h, (
boxes[:, 3] - boxes[:, 1]) / h
return boxes, labels, scores, mask.sum() return boxes, labels, scores, mask.sum()
def draw_boxes_on_image(image_path, boxes, scores, labels, label_names, score_thresh=0.5):
def draw_boxes_on_image(image_path,
boxes,
scores,
labels,
label_names,
score_thresh=0.5):
image = np.array(Image.open(image_path)) image = np.array(Image.open(image_path))
plt.figure() plt.figure()
_, ax = plt.subplots(1) _, ax = plt.subplots(1)
...@@ -158,22 +173,33 @@ def draw_boxes_on_image(image_path, boxes, scores, labels, label_names, score_th ...@@ -158,22 +173,33 @@ def draw_boxes_on_image(image_path, boxes, scores, labels, label_names, score_th
if label not in colors: if label not in colors:
colors[label] = plt.get_cmap('hsv')(label / len(label_names)) colors[label] = plt.get_cmap('hsv')(label / len(label_names))
x1, y1, x2, y2 = box[0], box[1], box[2], box[3] x1, y1, x2, y2 = box[0], box[1], box[2], box[3]
rect = plt.Rectangle((x1, y1), x2 - x1, y2 - y1, rect = plt.Rectangle(
fill=False, linewidth=2.0, (x1, y1),
edgecolor=colors[label]) x2 - x1,
y2 - y1,
fill=False,
linewidth=2.0,
edgecolor=colors[label])
ax.add_patch(rect) ax.add_patch(rect)
ax.text(x1, y1, '{} {:.4f}'.format(label_names[label], score), ax.text(
verticalalignment='bottom', horizontalalignment='left', x1,
bbox={'facecolor': colors[label], 'alpha': 0.5, 'pad': 0}, y1,
fontsize=8, color='white') '{} {:.4f}'.format(label_names[label], score),
print("\t {:15s} at {:25} score: {:.5f}".format( verticalalignment='bottom',
label_names[int(label)], str(list(map(int, list(box)))), score)) horizontalalignment='left',
bbox={'facecolor': colors[label],
'alpha': 0.5,
'pad': 0},
fontsize=8,
color='white')
print("\t {:15s} at {:25} score: {:.5f}".format(label_names[int(
label)], str(list(map(int, list(box)))), score))
image_name = image_name.replace('jpg', 'png') image_name = image_name.replace('jpg', 'png')
plt.axis('off') plt.axis('off')
plt.gca().xaxis.set_major_locator(plt.NullLocator()) plt.gca().xaxis.set_major_locator(plt.NullLocator())
plt.gca().yaxis.set_major_locator(plt.NullLocator()) plt.gca().yaxis.set_major_locator(plt.NullLocator())
plt.savefig("./output/{}".format(image_name), bbox_inches='tight', pad_inches=0.0) plt.savefig(
"./output/{}".format(image_name), bbox_inches='tight', pad_inches=0.0)
print("Detect result save at ./output/{}\n".format(image_name)) print("Detect result save at ./output/{}\n".format(image_name))
plt.cla() plt.cla()
plt.close('all') plt.close('all')
...@@ -33,7 +33,6 @@ _C.gt_min_area = -1 ...@@ -33,7 +33,6 @@ _C.gt_min_area = -1
# max target box number in an image # max target box number in an image
_C.max_box_num = 50 _C.max_box_num = 50
# #
# Training options # Training options
# #
...@@ -53,7 +52,6 @@ _C.nms_posk = 100 ...@@ -53,7 +52,6 @@ _C.nms_posk = 100
# score threshold for draw box in debug mode # score threshold for draw box in debug mode
_C.draw_thresh = 0.5 _C.draw_thresh = 0.5
# #
# Model options # Model options
# #
...@@ -65,7 +63,9 @@ _C.pixel_means = [0.485, 0.456, 0.406] ...@@ -65,7 +63,9 @@ _C.pixel_means = [0.485, 0.456, 0.406]
_C.pixel_stds = [0.229, 0.224, 0.225] _C.pixel_stds = [0.229, 0.224, 0.225]
# anchors box weight and height # anchors box weight and height
_C.anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326] _C.anchors = [
10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326
]
# anchor mask of each yolo layer # anchor mask of each yolo layer
_C.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] _C.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
...@@ -73,7 +73,6 @@ _C.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] ...@@ -73,7 +73,6 @@ _C.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
# IoU threshold to ignore objectness loss of pred box # IoU threshold to ignore objectness loss of pred box
_C.ignore_thresh = .7 _C.ignore_thresh = .7
# #
# SOLVER options # SOLVER options
# #
......
...@@ -64,12 +64,12 @@ def eval(): ...@@ -64,12 +64,12 @@ def eval():
w = x2 - x1 + 1 w = x2 - x1 + 1
h = y2 - y1 + 1 h = y2 - y1 + 1
bbox = [x1, y1, w, h] bbox = [x1, y1, w, h]
res = { res = {
'image_id': im_id, 'image_id': im_id,
'category_id': label_ids[int(label)], 'category_id': label_ids[int(label)],
'bbox': list(map(float, bbox)), 'bbox': list(map(float, bbox)),
'score': float(score) 'score': float(score)
} }
result.append(res) result.append(res)
return result return result
...@@ -79,11 +79,10 @@ def eval(): ...@@ -79,11 +79,10 @@ def eval():
total_time = 0 total_time = 0
for batch_id, batch_data in enumerate(test_reader()): for batch_id, batch_data in enumerate(test_reader()):
start_time = time.time() start_time = time.time()
batch_outputs = exe.run( batch_outputs = exe.run(fetch_list=[v.name for v in fetch_list],
fetch_list=[v.name for v in fetch_list], feed=feeder.feed(batch_data),
feed=feeder.feed(batch_data), return_numpy=False,
return_numpy=False, use_program_cache=True)
use_program_cache=True)
lod = batch_outputs[0].lod()[0] lod = batch_outputs[0].lod()[0]
nmsed_boxes = np.array(batch_outputs[0]) nmsed_boxes = np.array(batch_outputs[0])
if nmsed_boxes.shape[1] != 6: if nmsed_boxes.shape[1] != 6:
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
...@@ -30,46 +29,41 @@ def random_distort(img): ...@@ -30,46 +29,41 @@ def random_distort(img):
def random_brightness(img, lower=0.5, upper=1.5): def random_brightness(img, lower=0.5, upper=1.5):
e = np.random.uniform(lower, upper) e = np.random.uniform(lower, upper)
return ImageEnhance.Brightness(img).enhance(e) return ImageEnhance.Brightness(img).enhance(e)
def random_contrast(img, lower=0.5, upper=1.5): def random_contrast(img, lower=0.5, upper=1.5):
e = np.random.uniform(lower, upper) e = np.random.uniform(lower, upper)
return ImageEnhance.Contrast(img).enhance(e) return ImageEnhance.Contrast(img).enhance(e)
def random_color(img, lower=0.5, upper=1.5): def random_color(img, lower=0.5, upper=1.5):
e = np.random.uniform(lower, upper) e = np.random.uniform(lower, upper)
return ImageEnhance.Color(img).enhance(e) return ImageEnhance.Color(img).enhance(e)
ops = [random_brightness, random_contrast, random_color] ops = [random_brightness, random_contrast, random_color]
np.random.shuffle(ops) np.random.shuffle(ops)
img = Image.fromarray(img) img = Image.fromarray(img)
img = ops[0](img) img = ops[0](img)
img = ops[1](img) img = ops[1](img)
img = ops[2](img) img = ops[2](img)
img = np.asarray(img) img = np.asarray(img)
return img return img
def random_crop(img, def random_crop(img,
boxes, boxes,
labels, labels,
scores, scores,
scales=[0.3, 1.0], scales=[0.3, 1.0],
max_ratio=2.0, max_ratio=2.0,
constraints=None, constraints=None,
max_trial=50): max_trial=50):
if len(boxes) == 0: if len(boxes) == 0:
return img, boxes return img, boxes
if not constraints: if not constraints:
constraints = [ constraints = [(0.1, 1.0), (0.3, 1.0), (0.5, 1.0), (0.7, 1.0),
(0.1, 1.0), (0.9, 1.0), (0.0, 1.0)]
(0.3, 1.0),
(0.5, 1.0),
(0.7, 1.0),
(0.9, 1.0),
(0.0, 1.0)]
img = Image.fromarray(img) img = Image.fromarray(img)
w, h = img.size w, h = img.size
...@@ -83,12 +77,9 @@ def random_crop(img, ...@@ -83,12 +77,9 @@ def random_crop(img,
crop_w = int(w * scale * np.sqrt(aspect_ratio)) crop_w = int(w * scale * np.sqrt(aspect_ratio))
crop_x = random.randrange(w - crop_w) crop_x = random.randrange(w - crop_w)
crop_y = random.randrange(h - crop_h) crop_y = random.randrange(h - crop_h)
crop_box = np.array([[ crop_box = np.array([[(crop_x + crop_w / 2.0) / w,
(crop_x + crop_w / 2.0) / w, (crop_y + crop_h / 2.0) / h,
(crop_y + crop_h / 2.0) / h, crop_w / float(w), crop_h / float(h)]])
crop_w / float(w),
crop_h /float(h)
]])
iou = box_utils.box_iou_xywh(crop_box, boxes) iou = box_utils.box_iou_xywh(crop_box, boxes)
if min_iou <= iou.min() and max_iou >= iou.max(): if min_iou <= iou.min() and max_iou >= iou.max():
...@@ -101,19 +92,21 @@ def random_crop(img, ...@@ -101,19 +92,21 @@ def random_crop(img,
box_utils.box_crop(boxes, labels, scores, crop, (w, h)) box_utils.box_crop(boxes, labels, scores, crop, (w, h))
if box_num < 1: if box_num < 1:
continue continue
img = img.crop((crop[0], crop[1], crop[0] + crop[2], img = img.crop((crop[0], crop[1], crop[0] + crop[2],
crop[1] + crop[3])).resize(img.size, Image.LANCZOS) crop[1] + crop[3])).resize(img.size, Image.LANCZOS)
img = np.asarray(img) img = np.asarray(img)
return img, crop_boxes, crop_labels, crop_scores return img, crop_boxes, crop_labels, crop_scores
img = np.asarray(img) img = np.asarray(img)
return img, boxes, labels, scores return img, boxes, labels, scores
def random_flip(img, gtboxes, thresh=0.5): def random_flip(img, gtboxes, thresh=0.5):
if random.random() > thresh: if random.random() > thresh:
img = img[:, ::-1, :] img = img[:, ::-1, :]
gtboxes[:, 0] = 1.0 - gtboxes[:, 0] gtboxes[:, 0] = 1.0 - gtboxes[:, 0]
return img, gtboxes return img, gtboxes
def random_interp(img, size, interp=None): def random_interp(img, size, interp=None):
interp_method = [ interp_method = [
cv2.INTER_NEAREST, cv2.INTER_NEAREST,
...@@ -121,28 +114,29 @@ def random_interp(img, size, interp=None): ...@@ -121,28 +114,29 @@ def random_interp(img, size, interp=None):
cv2.INTER_AREA, cv2.INTER_AREA,
cv2.INTER_CUBIC, cv2.INTER_CUBIC,
cv2.INTER_LANCZOS4, cv2.INTER_LANCZOS4,
] ]
if not interp or interp not in interp_method: if not interp or interp not in interp_method:
interp = interp_method[random.randint(0, len(interp_method) - 1)] interp = interp_method[random.randint(0, len(interp_method) - 1)]
h, w, _ = img.shape h, w, _ = img.shape
im_scale_x = size / float(w) im_scale_x = size / float(w)
im_scale_y = size / float(h) im_scale_y = size / float(h)
img = cv2.resize(img, None, None, fx=im_scale_x, fy=im_scale_y, img = cv2.resize(
interpolation=interp) img, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=interp)
return img return img
def random_expand(img,
gtboxes, def random_expand(img,
max_ratio=4., gtboxes,
fill=None, max_ratio=4.,
keep_ratio=True, fill=None,
keep_ratio=True,
thresh=0.5): thresh=0.5):
if random.random() > thresh: if random.random() > thresh:
return img, gtboxes return img, gtboxes
if max_ratio < 1.0: if max_ratio < 1.0:
return img, gtboxes return img, gtboxes
h, w, c = img.shape h, w, c = img.shape
ratio_x = random.uniform(1, max_ratio) ratio_x = random.uniform(1, max_ratio)
if keep_ratio: if keep_ratio:
...@@ -151,15 +145,15 @@ def random_expand(img, ...@@ -151,15 +145,15 @@ def random_expand(img,
ratio_y = random.uniform(1, max_ratio) ratio_y = random.uniform(1, max_ratio)
oh = int(h * ratio_y) oh = int(h * ratio_y)
ow = int(w * ratio_x) ow = int(w * ratio_x)
off_x = random.randint(0, ow -w) off_x = random.randint(0, ow - w)
off_y = random.randint(0, oh -h) off_y = random.randint(0, oh - h)
out_img = np.zeros((oh, ow, c)) out_img = np.zeros((oh, ow, c))
if fill and len(fill) == c: if fill and len(fill) == c:
for i in range(c): for i in range(c):
out_img[:, :, i] = fill[i] * 255.0 out_img[:, :, i] = fill[i] * 255.0
out_img[off_y: off_y + h, off_x: off_x + w, :] = img out_img[off_y:off_y + h, off_x:off_x + w, :] = img
gtboxes[:, 0] = ((gtboxes[:, 0] * w) + off_x) / float(ow) gtboxes[:, 0] = ((gtboxes[:, 0] * w) + off_x) / float(ow)
gtboxes[:, 1] = ((gtboxes[:, 1] * h) + off_y) / float(oh) gtboxes[:, 1] = ((gtboxes[:, 1] * h) + off_y) / float(oh)
gtboxes[:, 2] = gtboxes[:, 2] / ratio_x gtboxes[:, 2] = gtboxes[:, 2] / ratio_x
...@@ -167,21 +161,17 @@ def random_expand(img, ...@@ -167,21 +161,17 @@ def random_expand(img,
return out_img.astype('uint8'), gtboxes return out_img.astype('uint8'), gtboxes
def shuffle_gtbox(gtbox, gtlabel, gtscore): def shuffle_gtbox(gtbox, gtlabel, gtscore):
gt = np.concatenate([gtbox, gtlabel[:, np.newaxis], gt = np.concatenate(
gtscore[:, np.newaxis]], axis=1) [gtbox, gtlabel[:, np.newaxis], gtscore[:, np.newaxis]], axis=1)
idx = np.arange(gt.shape[0]) idx = np.arange(gt.shape[0])
np.random.shuffle(idx) np.random.shuffle(idx)
gt = gt[idx, :] gt = gt[idx, :]
return gt[:, :4], gt[:, 4], gt[:, 5] return gt[:, :4], gt[:, 4], gt[:, 5]
def image_mixup(img1,
gtboxes1, def image_mixup(img1, gtboxes1, gtlabels1, gtscores1, img2, gtboxes2, gtlabels2,
gtlabels1,
gtscores1,
img2,
gtboxes2,
gtlabels2,
gtscores2): gtscores2):
factor = np.random.beta(1.5, 1.5) factor = np.random.beta(1.5, 1.5)
factor = max(0.0, min(1.0, factor)) factor = max(0.0, min(1.0, factor))
...@@ -229,7 +219,8 @@ def image_mixup(img1, ...@@ -229,7 +219,8 @@ def image_mixup(img1,
gtscores[:gt_num] = gtscores_all[:gt_num] gtscores[:gt_num] = gtscores_all[:gt_num]
return img.astype('uint8'), gtboxes, gtlabels, gtscores return img.astype('uint8'), gtboxes, gtlabels, gtscores
def image_augment(img, gtboxes, gtlabels, gtscores, size, means=None):
def image_augment(img, gtboxes, gtlabels, gtscores, size, means=None):
img = random_distort(img) img = random_distort(img)
img, gtboxes = random_expand(img, gtboxes, fill=means) img, gtboxes = random_expand(img, gtboxes, fill=means)
img, gtboxes, gtlabels, gtscores = \ img, gtboxes, gtlabels, gtscores = \
...@@ -240,4 +231,3 @@ def image_augment(img, gtboxes, gtlabels, gtscores, size, means=None): ...@@ -240,4 +231,3 @@ def image_augment(img, gtboxes, gtlabels, gtscores, size, means=None):
return img.astype('float32'), gtboxes.astype('float32'), \ return img.astype('float32'), gtboxes.astype('float32'), \
gtlabels.astype('int32'), gtscores.astype('float32') gtlabels.astype('int32'), gtscores.astype('float32')
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os import os
import time import time
import numpy as np import numpy as np
...@@ -54,14 +53,14 @@ def infer(): ...@@ -54,14 +53,14 @@ def infer():
if image_name.split('.')[-1] in ['jpg', 'png']: if image_name.split('.')[-1] in ['jpg', 'png']:
image_names.append(image_name) image_names.append(image_name)
for image_name in image_names: for image_name in image_names:
infer_reader = reader.infer(input_size, os.path.join(cfg.image_path, image_name)) infer_reader = reader.infer(input_size,
os.path.join(cfg.image_path, image_name))
label_names, _ = reader.get_label_infos() label_names, _ = reader.get_label_infos()
data = next(infer_reader()) data = next(infer_reader())
im_shape = data[0][2] im_shape = data[0][2]
outputs = exe.run( outputs = exe.run(fetch_list=[v.name for v in fetch_list],
fetch_list=[v.name for v in fetch_list], feed=feeder.feed(data),
feed=feeder.feed(data), return_numpy=False)
return_numpy=False)
bboxes = np.array(outputs[0]) bboxes = np.array(outputs[0])
if bboxes.shape[1] != 6: if bboxes.shape[1] != 6:
print("No object found in {}".format(image_name)) print("No object found in {}".format(image_name))
...@@ -71,7 +70,8 @@ def infer(): ...@@ -71,7 +70,8 @@ def infer():
boxes = bboxes[:, 2:].astype('float32') boxes = bboxes[:, 2:].astype('float32')
path = os.path.join(cfg.image_path, image_name) path = os.path.join(cfg.image_path, image_name)
box_utils.draw_boxes_on_image(path, boxes, scores, labels, label_names, cfg.draw_thresh) box_utils.draw_boxes_on_image(path, boxes, scores, labels, label_names,
cfg.draw_thresh)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -17,6 +17,7 @@ from paddle.fluid.param_attr import ParamAttr ...@@ -17,6 +17,7 @@ from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant from paddle.fluid.initializer import Constant
from paddle.fluid.regularizer import L2Decay from paddle.fluid.regularizer import L2Decay
def conv_bn_layer(input, def conv_bn_layer(input,
ch_out, ch_out,
filter_size, filter_size,
...@@ -32,8 +33,9 @@ def conv_bn_layer(input, ...@@ -32,8 +33,9 @@ def conv_bn_layer(input,
stride=stride, stride=stride,
padding=padding, padding=padding,
act=None, act=None,
param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02), param_attr=ParamAttr(
name=name+".conv.weights"), initializer=fluid.initializer.Normal(0., 0.02),
name=name + ".conv.weights"),
bias_attr=False) bias_attr=False)
bn_name = name + ".bn" bn_name = name + ".bn"
...@@ -42,72 +44,88 @@ def conv_bn_layer(input, ...@@ -42,72 +44,88 @@ def conv_bn_layer(input,
act=None, act=None,
is_test=is_test, is_test=is_test,
param_attr=ParamAttr( param_attr=ParamAttr(
initializer=fluid.initializer.Normal(0., 0.02), initializer=fluid.initializer.Normal(0., 0.02),
regularizer=L2Decay(0.), regularizer=L2Decay(0.),
name=bn_name + '.scale'), name=bn_name + '.scale'),
bias_attr=ParamAttr( bias_attr=ParamAttr(
initializer=fluid.initializer.Constant(0.0), initializer=fluid.initializer.Constant(0.0),
regularizer=L2Decay(0.), regularizer=L2Decay(0.),
name=bn_name + '.offset'), name=bn_name + '.offset'),
moving_mean_name=bn_name + '.mean', moving_mean_name=bn_name + '.mean',
moving_variance_name=bn_name + '.var') moving_variance_name=bn_name + '.var')
if act == 'leaky': if act == 'leaky':
out = fluid.layers.leaky_relu(x=out, alpha=0.1) out = fluid.layers.leaky_relu(x=out, alpha=0.1)
return out return out
def downsample(input,
ch_out, def downsample(input,
filter_size=3, ch_out,
stride=2, filter_size=3,
padding=1, stride=2,
is_test=True, padding=1,
is_test=True,
name=None): name=None):
return conv_bn_layer(input, return conv_bn_layer(
ch_out=ch_out, input,
filter_size=filter_size, ch_out=ch_out,
stride=stride, filter_size=filter_size,
padding=padding, stride=stride,
is_test=is_test, padding=padding,
name=name) is_test=is_test,
name=name)
def basicblock(input, ch_out, is_test=True, name=None): def basicblock(input, ch_out, is_test=True, name=None):
conv1 = conv_bn_layer(input, ch_out, 1, 1, 0, conv1 = conv_bn_layer(
is_test=is_test, name=name+".0") input, ch_out, 1, 1, 0, is_test=is_test, name=name + ".0")
conv2 = conv_bn_layer(conv1, ch_out*2, 3, 1, 1, conv2 = conv_bn_layer(
is_test=is_test, name=name+".1") conv1, ch_out * 2, 3, 1, 1, is_test=is_test, name=name + ".1")
out = fluid.layers.elementwise_add(x=input, y=conv2, act=None) out = fluid.layers.elementwise_add(x=input, y=conv2, act=None)
return out return out
def layer_warp(block_func, input, ch_out, count, is_test=True, name=None): def layer_warp(block_func, input, ch_out, count, is_test=True, name=None):
res_out = block_func(input, ch_out, is_test=is_test, res_out = block_func(
name='{}.0'.format(name)) input, ch_out, is_test=is_test, name='{}.0'.format(name))
for j in range(1, count): for j in range(1, count):
res_out = block_func(res_out, ch_out, is_test=is_test, res_out = block_func(
name='{}.{}'.format(name, j)) res_out, ch_out, is_test=is_test, name='{}.{}'.format(name, j))
return res_out return res_out
DarkNet_cfg = {
53: ([1,2,8,8,4],basicblock) DarkNet_cfg = {53: ([1, 2, 8, 8, 4], basicblock)}
}
def add_DarkNet53_conv_body(body_input, is_test=True): def add_DarkNet53_conv_body(body_input, is_test=True):
stages, block_func = DarkNet_cfg[53] stages, block_func = DarkNet_cfg[53]
stages = stages[0:5] stages = stages[0:5]
conv1 = conv_bn_layer(body_input, ch_out=32, filter_size=3, conv1 = conv_bn_layer(
stride=1, padding=1, is_test=is_test, body_input,
name="yolo_input") ch_out=32,
downsample_ = downsample(conv1, ch_out=conv1.shape[1]*2, filter_size=3,
is_test=is_test, stride=1,
name="yolo_input.downsample") padding=1,
is_test=is_test,
name="yolo_input")
downsample_ = downsample(
conv1,
ch_out=conv1.shape[1] * 2,
is_test=is_test,
name="yolo_input.downsample")
blocks = [] blocks = []
for i, stage in enumerate(stages): for i, stage in enumerate(stages):
block = layer_warp(block_func, downsample_, 32 *(2**i), block = layer_warp(
stage, is_test=is_test, block_func,
name="stage.{}".format(i)) downsample_,
32 * (2**i),
stage,
is_test=is_test,
name="stage.{}".format(i))
blocks.append(block) blocks.append(block)
if i < len(stages) - 1: # do not downsaple in the last stage if i < len(stages) - 1: # do not downsaple in the last stage
downsample_ = downsample(block, ch_out=block.shape[1]*2, downsample_ = downsample(
is_test=is_test, block,
name="stage.{}.downsample".format(i)) ch_out=block.shape[1] * 2,
is_test=is_test,
name="stage.{}.downsample".format(i))
return blocks[-1:-4:-1] return blocks[-1:-4:-1]
...@@ -26,26 +26,48 @@ from config import cfg ...@@ -26,26 +26,48 @@ from config import cfg
from .darknet import add_DarkNet53_conv_body from .darknet import add_DarkNet53_conv_body
from .darknet import conv_bn_layer from .darknet import conv_bn_layer
def yolo_detection_block(input, channel, is_test=True, name=None): def yolo_detection_block(input, channel, is_test=True, name=None):
assert channel % 2 == 0, \ assert channel % 2 == 0, \
"channel {} cannot be divided by 2".format(channel) "channel {} cannot be divided by 2".format(channel)
conv = input conv = input
for j in range(2): for j in range(2):
conv = conv_bn_layer(conv, channel, filter_size=1, conv = conv_bn_layer(
stride=1, padding=0, is_test=is_test, conv,
name='{}.{}.0'.format(name, j)) channel,
conv = conv_bn_layer(conv, channel*2, filter_size=3, filter_size=1,
stride=1, padding=1, is_test=is_test, stride=1,
name='{}.{}.1'.format(name, j)) padding=0,
route = conv_bn_layer(conv, channel, filter_size=1, stride=1, is_test=is_test,
padding=0, is_test=is_test, name='{}.{}.0'.format(name, j))
name='{}.2'.format(name)) conv = conv_bn_layer(
tip = conv_bn_layer(route,channel*2, filter_size=3, stride=1, conv,
padding=1, is_test=is_test, channel * 2,
name='{}.tip'.format(name)) filter_size=3,
stride=1,
padding=1,
is_test=is_test,
name='{}.{}.1'.format(name, j))
route = conv_bn_layer(
conv,
channel,
filter_size=1,
stride=1,
padding=0,
is_test=is_test,
name='{}.2'.format(name))
tip = conv_bn_layer(
route,
channel * 2,
filter_size=3,
stride=1,
padding=1,
is_test=is_test,
name='{}.tip'.format(name))
return route, tip return route, tip
def upsample(input, scale=2,name=None):
def upsample(input, scale=2, name=None):
# get dynamic upsample output shape # get dynamic upsample output shape
shape_nchw = fluid.layers.shape(input) shape_nchw = fluid.layers.shape(input)
shape_hw = fluid.layers.slice(shape_nchw, axes=[0], starts=[2], ends=[4]) shape_hw = fluid.layers.slice(shape_nchw, axes=[0], starts=[2], ends=[4])
...@@ -56,16 +78,12 @@ def upsample(input, scale=2,name=None): ...@@ -56,16 +78,12 @@ def upsample(input, scale=2,name=None):
# reisze by actual_shape # reisze by actual_shape
out = fluid.layers.resize_nearest( out = fluid.layers.resize_nearest(
input=input, input=input, scale=scale, actual_shape=out_shape, name=name)
scale=scale,
actual_shape=out_shape,
name=name)
return out return out
class YOLOv3(object): class YOLOv3(object):
def __init__(self, def __init__(self, is_train=True, use_random=True):
is_train=True,
use_random=True):
self.is_train = is_train self.is_train = is_train
self.use_random = use_random self.use_random = use_random
self.outputs = [] self.outputs = []
...@@ -77,10 +95,8 @@ class YOLOv3(object): ...@@ -77,10 +95,8 @@ class YOLOv3(object):
if self.is_train: if self.is_train:
self.py_reader = fluid.layers.py_reader( self.py_reader = fluid.layers.py_reader(
capacity=64, capacity=64,
shapes = [[-1] + self.image_shape, shapes=[[-1] + self.image_shape, [-1, cfg.max_box_num, 4],
[-1, cfg.max_box_num, 4], [-1, cfg.max_box_num], [-1, cfg.max_box_num]],
[-1, cfg.max_box_num],
[-1, cfg.max_box_num]],
lod_levels=[0, 0, 0, 0], lod_levels=[0, 0, 0, 0],
dtypes=['float32'] * 2 + ['int32'] + ['float32'], dtypes=['float32'] * 2 + ['int32'] + ['float32'],
use_double_buffer=True) use_double_buffer=True)
...@@ -88,13 +104,12 @@ class YOLOv3(object): ...@@ -88,13 +104,12 @@ class YOLOv3(object):
fluid.layers.read_file(self.py_reader) fluid.layers.read_file(self.py_reader)
else: else:
self.image = fluid.layers.data( self.image = fluid.layers.data(
name='image', shape=self.image_shape, dtype='float32' name='image', shape=self.image_shape, dtype='float32')
)
self.im_shape = fluid.layers.data( self.im_shape = fluid.layers.data(
name="im_shape", shape=[2], dtype='int32') name="im_shape", shape=[2], dtype='int32')
self.im_id = fluid.layers.data( self.im_id = fluid.layers.data(
name="im_id", shape=[1], dtype='int32') name="im_id", shape=[1], dtype='int32')
def feeds(self): def feeds(self):
if not self.is_train: if not self.is_train:
return [self.image, self.im_id, self.im_shape] return [self.image, self.im_id, self.im_shape]
...@@ -110,12 +125,12 @@ class YOLOv3(object): ...@@ -110,12 +125,12 @@ class YOLOv3(object):
blocks = add_DarkNet53_conv_body(self.image, not self.is_train) blocks = add_DarkNet53_conv_body(self.image, not self.is_train)
for i, block in enumerate(blocks): for i, block in enumerate(blocks):
if i > 0: if i > 0:
block = fluid.layers.concat( block = fluid.layers.concat(input=[route, block], axis=1)
input=[route, block], route, tip = yolo_detection_block(
axis=1) block,
route, tip = yolo_detection_block(block, channel=512//(2**i), channel=512 // (2**i),
is_test=(not self.is_train), is_test=(not self.is_train),
name="yolo_block.{}".format(i)) name="yolo_block.{}".format(i))
# out channel number = mask_num * (5 + class_num) # out channel number = mask_num * (5 + class_num)
num_filters = len(cfg.anchor_masks[i]) * (cfg.class_num + 5) num_filters = len(cfg.anchor_masks[i]) * (cfg.class_num + 5)
...@@ -126,17 +141,19 @@ class YOLOv3(object): ...@@ -126,17 +141,19 @@ class YOLOv3(object):
stride=1, stride=1,
padding=0, padding=0,
act=None, act=None,
param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02), param_attr=ParamAttr(
name="yolo_output.{}.conv.weights".format(i)), initializer=fluid.initializer.Normal(0., 0.02),
bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), name="yolo_output.{}.conv.weights".format(i)),
regularizer=L2Decay(0.), bias_attr=ParamAttr(
name="yolo_output.{}.conv.bias".format(i))) initializer=fluid.initializer.Constant(0.0),
regularizer=L2Decay(0.),
name="yolo_output.{}.conv.bias".format(i)))
self.outputs.append(block_out) self.outputs.append(block_out)
if i < len(blocks) - 1: if i < len(blocks) - 1:
route = conv_bn_layer( route = conv_bn_layer(
input=route, input=route,
ch_out=256//(2**i), ch_out=256 // (2**i),
filter_size=1, filter_size=1,
stride=1, stride=1,
padding=0, padding=0,
...@@ -145,42 +162,42 @@ class YOLOv3(object): ...@@ -145,42 +162,42 @@ class YOLOv3(object):
# upsample # upsample
route = upsample(route) route = upsample(route)
for i, out in enumerate(self.outputs): for i, out in enumerate(self.outputs):
anchor_mask = cfg.anchor_masks[i] anchor_mask = cfg.anchor_masks[i]
if self.is_train: if self.is_train:
loss = fluid.layers.yolov3_loss( loss = fluid.layers.yolov3_loss(
x=out, x=out,
gt_box=self.gtbox, gt_box=self.gtbox,
gt_label=self.gtlabel, gt_label=self.gtlabel,
gt_score=self.gtscore, gt_score=self.gtscore,
anchors=cfg.anchors, anchors=cfg.anchors,
anchor_mask=anchor_mask, anchor_mask=anchor_mask,
class_num=cfg.class_num, class_num=cfg.class_num,
ignore_thresh=cfg.ignore_thresh, ignore_thresh=cfg.ignore_thresh,
downsample_ratio=self.downsample, downsample_ratio=self.downsample,
use_label_smooth=cfg.label_smooth, use_label_smooth=cfg.label_smooth,
name="yolo_loss"+str(i)) name="yolo_loss" + str(i))
self.losses.append(fluid.layers.reduce_mean(loss)) self.losses.append(fluid.layers.reduce_mean(loss))
else: else:
mask_anchors=[] mask_anchors = []
for m in anchor_mask: for m in anchor_mask:
mask_anchors.append(cfg.anchors[2 * m]) mask_anchors.append(cfg.anchors[2 * m])
mask_anchors.append(cfg.anchors[2 * m + 1]) mask_anchors.append(cfg.anchors[2 * m + 1])
boxes, scores = fluid.layers.yolo_box( boxes, scores = fluid.layers.yolo_box(
x=out, x=out,
img_size=self.im_shape, img_size=self.im_shape,
anchors=mask_anchors, anchors=mask_anchors,
class_num=cfg.class_num, class_num=cfg.class_num,
conf_thresh=cfg.valid_thresh, conf_thresh=cfg.valid_thresh,
downsample_ratio=self.downsample, downsample_ratio=self.downsample,
name="yolo_box"+str(i)) name="yolo_box" + str(i))
self.boxes.append(boxes) self.boxes.append(boxes)
self.scores.append(fluid.layers.transpose(scores, perm=[0, 2, 1])) self.scores.append(
fluid.layers.transpose(
self.downsample //= 2 scores, perm=[0, 2, 1]))
self.downsample //= 2
def loss(self): def loss(self):
return sum(self.losses) return sum(self.losses)
...@@ -189,12 +206,11 @@ class YOLOv3(object): ...@@ -189,12 +206,11 @@ class YOLOv3(object):
yolo_boxes = fluid.layers.concat(self.boxes, axis=1) yolo_boxes = fluid.layers.concat(self.boxes, axis=1)
yolo_scores = fluid.layers.concat(self.scores, axis=2) yolo_scores = fluid.layers.concat(self.scores, axis=2)
return fluid.layers.multiclass_nms( return fluid.layers.multiclass_nms(
bboxes=yolo_boxes, bboxes=yolo_boxes,
scores=yolo_scores, scores=yolo_scores,
score_threshold=cfg.valid_thresh, score_threshold=cfg.valid_thresh,
nms_top_k=cfg.nms_topk, nms_top_k=cfg.nms_topk,
keep_top_k=cfg.nms_posk, keep_top_k=cfg.nms_posk,
nms_threshold=cfg.nms_thresh, nms_threshold=cfg.nms_thresh,
background_label=-1, background_label=-1,
name="multiclass_nms") name="multiclass_nms")
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
...@@ -53,21 +52,17 @@ class DataSetReader(object): ...@@ -53,21 +52,17 @@ class DataSetReader(object):
cfg.dataset)) cfg.dataset))
if mode == 'train': if mode == 'train':
cfg.train_file_list = os.path.join(cfg.data_dir, cfg.train_file_list = os.path.join(cfg.data_dir,
cfg.train_file_list) cfg.train_file_list)
cfg.train_data_dir = os.path.join(cfg.data_dir, cfg.train_data_dir = os.path.join(cfg.data_dir, cfg.train_data_dir)
cfg.train_data_dir)
self.COCO = COCO(cfg.train_file_list) self.COCO = COCO(cfg.train_file_list)
self.img_dir = cfg.train_data_dir self.img_dir = cfg.train_data_dir
elif mode == 'test' or mode == 'infer': elif mode == 'test' or mode == 'infer':
cfg.val_file_list = os.path.join(cfg.data_dir, cfg.val_file_list = os.path.join(cfg.data_dir, cfg.val_file_list)
cfg.val_file_list) cfg.val_data_dir = os.path.join(cfg.data_dir, cfg.val_data_dir)
cfg.val_data_dir = os.path.join(cfg.data_dir,
cfg.val_data_dir)
self.COCO = COCO(cfg.val_file_list) self.COCO = COCO(cfg.val_file_list)
self.img_dir = cfg.val_data_dir self.img_dir = cfg.val_data_dir
def _parse_dataset_catagory(self): def _parse_dataset_catagory(self):
self.categories = self.COCO.loadCats(self.COCO.getCatIds()) self.categories = self.COCO.loadCats(self.COCO.getCatIds())
self.num_category = len(self.categories) self.num_category = len(self.categories)
...@@ -76,10 +71,7 @@ class DataSetReader(object): ...@@ -76,10 +71,7 @@ class DataSetReader(object):
for category in self.categories: for category in self.categories:
self.label_names.append(category['name']) self.label_names.append(category['name'])
self.label_ids.append(int(category['id'])) self.label_ids.append(int(category['id']))
self.category_to_id_map = { self.category_to_id_map = {v: i for i, v in enumerate(self.label_ids)}
v: i
for i, v in enumerate(self.label_ids)
}
print("Load in {} categories.".format(self.num_category)) print("Load in {} categories.".format(self.num_category))
self.has_parsed_categpry = True self.has_parsed_categpry = True
...@@ -93,7 +85,8 @@ class DataSetReader(object): ...@@ -93,7 +85,8 @@ class DataSetReader(object):
img_height = img['height'] img_height = img['height']
img_width = img['width'] img_width = img['width']
anno = self.COCO.loadAnns( anno = self.COCO.loadAnns(
self.COCO.getAnnIds(imgIds=img['id'], iscrowd=None)) self.COCO.getAnnIds(
imgIds=img['id'], iscrowd=None))
gt_index = 0 gt_index = 0
for target in anno: for target in anno:
if target['area'] < cfg.gt_min_area: if target['area'] < cfg.gt_min_area:
...@@ -102,7 +95,7 @@ class DataSetReader(object): ...@@ -102,7 +95,7 @@ class DataSetReader(object):
continue continue
box = box_utils.coco_anno_box_to_center_relative( box = box_utils.coco_anno_box_to_center_relative(
target['bbox'], img_height, img_width) target['bbox'], img_height, img_width)
if box[2] <= 0 and box[3] <= 0: if box[2] <= 0 and box[3] <= 0:
continue continue
...@@ -141,15 +134,15 @@ class DataSetReader(object): ...@@ -141,15 +134,15 @@ class DataSetReader(object):
if mode == 'infer': if mode == 'infer':
return [] return []
else: else:
return self._parse_images(is_train=(mode=='train')) return self._parse_images(is_train=(mode == 'train'))
def get_reader(self, def get_reader(self,
mode, mode,
size=416, size=416,
batch_size=None, batch_size=None,
shuffle=False, shuffle=False,
mixup_iter=0, mixup_iter=0,
random_sizes=[], random_sizes=[],
image=None): image=None):
assert mode in ['train', 'test', 'infer'], "Unknow mode type!" assert mode in ['train', 'test', 'infer'], "Unknow mode type!"
if mode != 'infer': if mode != 'infer':
...@@ -166,9 +159,13 @@ class DataSetReader(object): ...@@ -166,9 +159,13 @@ class DataSetReader(object):
h, w, _ = im.shape h, w, _ = im.shape
im_scale_x = size / float(w) im_scale_x = size / float(w)
im_scale_y = size / float(h) im_scale_y = size / float(h)
out_img = cv2.resize(im, None, None, out_img = cv2.resize(
fx=im_scale_x, fy=im_scale_y, im,
interpolation=cv2.INTER_CUBIC) None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=cv2.INTER_CUBIC)
mean = np.array(mean).reshape((1, 1, -1)) mean = np.array(mean).reshape((1, 1, -1))
std = np.array(std).reshape((1, 1, -1)) std = np.array(std).reshape((1, 1, -1))
out_img = (out_img / 255.0 - mean) / std out_img = (out_img / 255.0 - mean) / std
...@@ -191,12 +188,12 @@ class DataSetReader(object): ...@@ -191,12 +188,12 @@ class DataSetReader(object):
mixup_gt_labels = np.array(mixup_img['gt_labels']).copy() mixup_gt_labels = np.array(mixup_img['gt_labels']).copy()
mixup_gt_scores = np.ones_like(mixup_gt_labels) mixup_gt_scores = np.ones_like(mixup_gt_labels)
im, gt_boxes, gt_labels, gt_scores = \ im, gt_boxes, gt_labels, gt_scores = \
image_utils.image_mixup(im, gt_boxes, gt_labels, image_utils.image_mixup(im, gt_boxes, gt_labels,
gt_scores, mixup_im, mixup_gt_boxes, gt_scores, mixup_im, mixup_gt_boxes,
mixup_gt_labels, mixup_gt_scores) mixup_gt_labels, mixup_gt_scores)
im, gt_boxes, gt_labels, gt_scores = \ im, gt_boxes, gt_labels, gt_scores = \
image_utils.image_augment(im, gt_boxes, gt_labels, image_utils.image_augment(im, gt_boxes, gt_labels,
gt_scores, size, mean) gt_scores, size, mean)
mean = np.array(mean).reshape((1, 1, -1)) mean = np.array(mean).reshape((1, 1, -1))
...@@ -230,12 +227,13 @@ class DataSetReader(object): ...@@ -230,12 +227,13 @@ class DataSetReader(object):
img_size = get_img_size(size, random_sizes) img_size = get_img_size(size, random_sizes)
while True: while True:
img = imgs[read_cnt % len(imgs)] img = imgs[read_cnt % len(imgs)]
mixup_img = get_mixup_img(imgs, mixup_iter, total_iter, read_cnt) mixup_img = get_mixup_img(imgs, mixup_iter, total_iter,
read_cnt)
read_cnt += 1 read_cnt += 1
if read_cnt % len(imgs) == 0 and shuffle: if read_cnt % len(imgs) == 0 and shuffle:
np.random.shuffle(imgs) np.random.shuffle(imgs)
im, gt_boxes, gt_labels, gt_scores = \ im, gt_boxes, gt_labels, gt_scores = \
img_reader_with_augment(img, img_size, cfg.pixel_means, img_reader_with_augment(img, img_size, cfg.pixel_means,
cfg.pixel_stds, mixup_img) cfg.pixel_stds, mixup_img)
batch_out.append([im, gt_boxes, gt_labels, gt_scores]) batch_out.append([im, gt_boxes, gt_labels, gt_scores])
...@@ -249,8 +247,7 @@ class DataSetReader(object): ...@@ -249,8 +247,7 @@ class DataSetReader(object):
imgs = self._parse_images_by_mode(mode) imgs = self._parse_images_by_mode(mode)
batch_out = [] batch_out = []
for img in imgs: for img in imgs:
im, im_id, im_shape = img_reader(img, size, im, im_id, im_shape = img_reader(img, size, cfg.pixel_means,
cfg.pixel_means,
cfg.pixel_stds) cfg.pixel_stds)
batch_out.append((im, im_id, im_shape)) batch_out.append((im, im_id, im_shape))
if len(batch_out) == batch_size: if len(batch_out) == batch_size:
...@@ -262,8 +259,7 @@ class DataSetReader(object): ...@@ -262,8 +259,7 @@ class DataSetReader(object):
img = {} img = {}
img['image'] = image img['image'] = image
img['id'] = 0 img['id'] = 0
im, im_id, im_shape = img_reader(img, size, im, im_id, im_shape = img_reader(img, size, cfg.pixel_means,
cfg.pixel_means,
cfg.pixel_stds) cfg.pixel_stds)
batch_out = [(im, im_id, im_shape)] batch_out = [(im, im_id, im_shape)]
yield batch_out yield batch_out
...@@ -273,17 +269,18 @@ class DataSetReader(object): ...@@ -273,17 +269,18 @@ class DataSetReader(object):
dsr = DataSetReader() dsr = DataSetReader()
def train(size=416,
batch_size=64, def train(size=416,
shuffle=True, batch_size=64,
shuffle=True,
total_iter=0, total_iter=0,
mixup_iter=0, mixup_iter=0,
random_sizes=[], random_sizes=[],
num_workers=8, num_workers=8,
max_queue=32, max_queue=32,
use_multiprocessing=True): use_multiprocessing=True):
generator = dsr.get_reader('train', size, batch_size, shuffle, generator = dsr.get_reader('train', size, batch_size, shuffle,
int(mixup_iter/num_workers), random_sizes) int(mixup_iter / num_workers), random_sizes)
if not use_multiprocessing: if not use_multiprocessing:
return generator return generator
...@@ -316,15 +313,17 @@ def train(size=416, ...@@ -316,15 +313,17 @@ def train(size=416,
finally: finally:
if enqueuer is not None: if enqueuer is not None:
enqueuer.stop() enqueuer.stop()
return reader return reader
def test(size=416, batch_size=1): def test(size=416, batch_size=1):
return dsr.get_reader('test', size, batch_size) return dsr.get_reader('test', size, batch_size)
def infer(size=416, image=None): def infer(size=416, image=None):
return dsr.get_reader('infer', size, image=image) return dsr.get_reader('infer', size, image=image)
def get_label_infos(): def get_label_infos():
return dsr.get_label_infos() return dsr.get_label_infos()
...@@ -33,12 +33,12 @@ from config import cfg ...@@ -33,12 +33,12 @@ from config import cfg
def train(): def train():
if cfg.debug: if cfg.debug or args.enable_ce:
fluid.default_startup_program().random_seed = 1000 fluid.default_startup_program().random_seed = 1000
fluid.default_main_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000
random.seed(0) random.seed(0)
np.random.seed(0) np.random.seed(0)
if not os.path.exists(cfg.model_save_dir): if not os.path.exists(cfg.model_save_dir):
os.makedirs(cfg.model_save_dir) os.makedirs(cfg.model_save_dir)
...@@ -76,16 +76,18 @@ def train(): ...@@ -76,16 +76,18 @@ def train():
if cfg.pretrain: if cfg.pretrain:
if not os.path.exists(cfg.pretrain): if not os.path.exists(cfg.pretrain):
print("Pretrain weights not found: {}".format(cfg.pretrain)) print("Pretrain weights not found: {}".format(cfg.pretrain))
def if_exist(var): def if_exist(var):
return os.path.exists(os.path.join(cfg.pretrain, var.name)) return os.path.exists(os.path.join(cfg.pretrain, var.name))
fluid.io.load_vars(exe, cfg.pretrain, predicate=if_exist) fluid.io.load_vars(exe, cfg.pretrain, predicate=if_exist)
build_strategy= fluid.BuildStrategy() build_strategy = fluid.BuildStrategy()
build_strategy.memory_optimize = True build_strategy.memory_optimize = True
build_strategy.sync_batch_norm = cfg.syncbn build_strategy.sync_batch_norm = cfg.syncbn
compile_program = fluid.compiler.CompiledProgram( compile_program = fluid.compiler.CompiledProgram(fluid.default_main_program(
fluid.default_main_program()).with_data_parallel( )).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy) loss_name=loss.name, build_strategy=build_strategy)
random_sizes = [cfg.input_size] random_sizes = [cfg.input_size]
if cfg.random_shape: if cfg.random_shape:
...@@ -93,13 +95,17 @@ def train(): ...@@ -93,13 +95,17 @@ def train():
total_iter = cfg.max_iter - cfg.start_iter total_iter = cfg.max_iter - cfg.start_iter
mixup_iter = total_iter - cfg.no_mixup_iter mixup_iter = total_iter - cfg.no_mixup_iter
train_reader = reader.train(input_size, shuffle = True
batch_size=cfg.batch_size, if args.enable_ce:
shuffle=True, shuffle = False
total_iter=total_iter*devices_num, train_reader = reader.train(
mixup_iter=mixup_iter*devices_num, input_size,
random_sizes=random_sizes, batch_size=cfg.batch_size,
use_multiprocessing=cfg.use_multiprocess) shuffle=shuffle,
total_iter=total_iter * devices_num,
mixup_iter=mixup_iter * devices_num,
random_sizes=random_sizes,
use_multiprocessing=cfg.use_multiprocess)
py_reader = model.py_reader py_reader = model.py_reader
py_reader.decorate_paddle_reader(train_reader) py_reader.decorate_paddle_reader(train_reader)
...@@ -121,7 +127,7 @@ def train(): ...@@ -121,7 +127,7 @@ def train():
for iter_id in range(cfg.start_iter, cfg.max_iter): for iter_id in range(cfg.start_iter, cfg.max_iter):
prev_start_time = start_time prev_start_time = start_time
start_time = time.time() start_time = time.time()
losses = exe.run(compile_program, losses = exe.run(compile_program,
fetch_list=[v.name for v in fetch_list]) fetch_list=[v.name for v in fetch_list])
smoothed_loss.add_value(np.mean(np.array(losses[0]))) smoothed_loss.add_value(np.mean(np.array(losses[0])))
snapshot_loss += np.mean(np.array(losses[0])) snapshot_loss += np.mean(np.array(losses[0]))
...@@ -129,17 +135,27 @@ def train(): ...@@ -129,17 +135,27 @@ def train():
lr = np.array(fluid.global_scope().find_var('learning_rate') lr = np.array(fluid.global_scope().find_var('learning_rate')
.get_tensor()) .get_tensor())
print("Iter {:d}, lr {:.6f}, loss {:.6f}, time {:.5f}".format( print("Iter {:d}, lr {:.6f}, loss {:.6f}, time {:.5f}".format(
iter_id, lr[0], iter_id, lr[0],
smoothed_loss.get_mean_value(), smoothed_loss.get_mean_value(), start_time - prev_start_time))
start_time - prev_start_time))
sys.stdout.flush() sys.stdout.flush()
if (iter_id + 1) % cfg.snapshot_iter == 0: if (iter_id + 1) % cfg.snapshot_iter == 0:
save_model("model_iter{}".format(iter_id)) save_model("model_iter{}".format(iter_id))
print("Snapshot {} saved, average loss: {}, \ print("Snapshot {} saved, average loss: {}, \
average time: {}".format( average time: {}".format(
iter_id + 1, iter_id + 1, snapshot_loss / float(cfg.snapshot_iter),
snapshot_loss / float(cfg.snapshot_iter), snapshot_time / float(cfg.snapshot_iter)))
snapshot_time / float(cfg.snapshot_iter))) if args.enable_ce and iter_id == cfg.max_iter - 1:
if devices_num == 1:
print("kpis\ttrain_cost_1card\t%f" %
(snapshot_loss / float(cfg.snapshot_iter)))
print("kpis\ttrain_duration_1card\t%f" %
(snapshot_time / float(cfg.snapshot_iter)))
else:
print("kpis\ttrain_cost_8card\t%f" %
(snapshot_loss / float(cfg.snapshot_iter)))
print("kpis\ttrain_duration_8card\t%f" %
(snapshot_time / float(cfg.snapshot_iter)))
snapshot_loss = 0 snapshot_loss = 0
snapshot_time = 0 snapshot_time = 0
except fluid.core.EOFException: except fluid.core.EOFException:
......
...@@ -120,12 +120,13 @@ def parse_args(): ...@@ -120,12 +120,13 @@ def parse_args():
add_arg('nms_posk', int, 100, "The number of boxes of NMS output.") add_arg('nms_posk', int, 100, "The number of boxes of NMS output.")
add_arg('debug', bool, False, "Debug mode") add_arg('debug', bool, False, "Debug mode")
# SINGLE EVAL AND DRAW # SINGLE EVAL AND DRAW
add_arg('image_path', str, 'image', add_arg('image_path', str, 'image',
"The image path used to inference and visualize.") "The image path used to inference and visualize.")
add_arg('image_name', str, None, add_arg('image_name', str, None,
"The single image used to inference and visualize. None to inference all images in image_path") "The single image used to inference and visualize. None to inference all images in image_path")
add_arg('draw_thresh', float, 0.5, add_arg('draw_thresh', float, 0.5,
"Confidence score threshold to draw prediction box in image in debug mode") "Confidence score threshold to draw prediction box in image in debug mode")
add_arg('enable_ce', bool, False, "If set True, enable continuous evaluation job.")
# yapf: enable # yapf: enable
args = parser.parse_args() args = parser.parse_args()
file_name = sys.argv[0] file_name = sys.argv[0]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册