提交 b2389e38 编写于 作者: D dengkaipeng

fit for train.

上级 9da63d61
...@@ -24,10 +24,6 @@ cfg = _C ...@@ -24,10 +24,6 @@ cfg = _C
# Training options # Training options
# #
# batch
_C.batch = 8
# Snapshot period # Snapshot period
_C.snapshot_iter = 2000 _C.snapshot_iter = 2000
...@@ -72,6 +68,9 @@ _C.pixel_stds = [0.229, 0.224, 0.225] ...@@ -72,6 +68,9 @@ _C.pixel_stds = [0.229, 0.224, 0.225]
# SOLVER options # SOLVER options
# #
# batch size
_C.batch_size = 64
# derived learning rate the to get the final learning rate. # derived learning rate the to get the final learning rate.
_C.learning_rate = 0.001 _C.learning_rate = 0.001
...@@ -92,9 +91,7 @@ _C.weight_decay = 0.0005 ...@@ -92,9 +91,7 @@ _C.weight_decay = 0.0005
# momentum with SGD # momentum with SGD
_C.momentum = 0.9 _C.momentum = 0.9
# decay #
_C.decay = 0.0005
# ENV options # ENV options
# #
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License"); #Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License. #you may not use this file except in compliance with the License.
#You may obtain a copy of the License at #You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software #Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS, #distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and #See the License for the specific language governing permissions and
#limitations under the License. #limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from __future__ import unicode_literals from __future__ import unicode_literals
LAYER_TYPES = [
"net",
"convolutional",
"shortcut",
"route",
"upsample",
"maxpool",
"yolo",
]
class ConfigPaser(object): class AttrDict(dict):
def __init__(self, config_path): def __init__(self, *args, **kwargs):
self.config_path = config_path super(AttrDict, self).__init__(*args, **kwargs)
def parse(self):
with open(self.config_path) as cfg_file:
model_defs = []
for line in cfg_file.readlines():
line = line.strip()
if len(line) == 0:
continue
if line.startswith('#'):
continue
if line.startswith('['):
layer_type = line[1:-1].strip()
if layer_type not in LAYER_TYPES:
print("Unknow config layer type: ", layer_type)
return None
model_defs.append({})
model_defs[-1]['type'] = layer_type
else:
key, value = line.split('=')
model_defs[-1][key.strip()] = value.strip()
return model_defs
def __getattr__(self, name):
if name in self.__dict__:
return self.__dict__[name]
elif name in self:
return self[name]
else:
raise AttributeError(name)
def __setattr__(self, name, value):
if name in self.__dict__:
self.__dict__[name] = value
else:
self[name] = value
...@@ -17,13 +17,13 @@ from __future__ import division ...@@ -17,13 +17,13 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import os
import time import time
import json
import numpy as np import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import reader import reader
import models.yolov3 as models from models.yolov3 import YOLOv3
from utility import print_arguments, parse_args from utility import print_arguments, parse_args
import json
from pycocotools.coco import COCO from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval, Params from pycocotools.cocoeval import COCOeval, Params
from config import cfg from config import cfg
...@@ -39,11 +39,9 @@ def eval(): ...@@ -39,11 +39,9 @@ def eval():
if not os.path.exists('output'): if not os.path.exists('output'):
os.mkdir('output') os.mkdir('output')
model = models.YOLOv3(cfg.model_cfg_path, is_train=False) model = YOLOv3(cfg.model_cfg_path, is_train=False)
model.build_model() model.build_model()
outputs = model.get_pred() outputs = model.get_pred()
yolo_anchors = model.get_yolo_anchors()
yolo_classes = model.get_yolo_classes()
place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
# yapf: disable # yapf: disable
...@@ -52,7 +50,7 @@ def eval(): ...@@ -52,7 +50,7 @@ def eval():
return os.path.exists(os.path.join(cfg.weights, var.name)) return os.path.exists(os.path.join(cfg.weights, var.name))
fluid.io.load_vars(exe, cfg.weights, predicate=if_exist) fluid.io.load_vars(exe, cfg.weights, predicate=if_exist)
# yapf: enable # yapf: enable
input_size = model.get_input_size() input_size = cfg.input_size
test_reader = reader.test(input_size, 1) test_reader = reader.test(input_size, 1)
label_names, label_ids = reader.get_label_infos() label_names, label_ids = reader.get_label_infos()
if cfg.debug: if cfg.debug:
......
...@@ -6,9 +6,7 @@ import paddle.fluid as fluid ...@@ -6,9 +6,7 @@ import paddle.fluid as fluid
import box_utils import box_utils
import reader import reader
from utility import print_arguments, parse_args from utility import print_arguments, parse_args
import models.yolov3 as models from models.yolov3 import YOLOv3
# from coco_reader import load_label_names
import json
from pycocotools.coco import COCO from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval, Params from pycocotools.cocoeval import COCOeval, Params
from config import cfg from config import cfg
...@@ -19,12 +17,10 @@ def infer(): ...@@ -19,12 +17,10 @@ def infer():
if not os.path.exists('output'): if not os.path.exists('output'):
os.mkdir('output') os.mkdir('output')
model = models.YOLOv3(cfg.model_cfg_path, is_train=False) model = YOLOv3(cfg.model_cfg_path, is_train=False)
model.build_model() model.build_model()
outputs = model.get_pred() outputs = model.get_pred()
input_size = model.get_input_size() input_size = cfg.input_size
yolo_anchors = model.get_yolo_anchors()
yolo_classes = model.get_yolo_classes()
place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
# yapf: disable # yapf: disable
......
...@@ -22,7 +22,7 @@ from paddle.fluid.layers import control_flow ...@@ -22,7 +22,7 @@ from paddle.fluid.layers import control_flow
def exponential_with_warmup_decay(learning_rate, boundaries, values, def exponential_with_warmup_decay(learning_rate, boundaries, values,
warmup_iter, warmup_factor, start_step): warmup_iter, warmup_factor):
global_step = lr_scheduler._decay_step_counter() global_step = lr_scheduler._decay_step_counter()
lr = fluid.layers.create_global_var( lr = fluid.layers.create_global_var(
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant
from paddle.fluid.initializer import Normal
from paddle.fluid.regularizer import L2Decay
import box_utils
from config.config_parser import ConfigPaser
from config.config import cfg
def conv_bn_layer(input,
ch_out,
filter_size,
stride,
padding,
act=None,
bn=False,
name=None,
is_train=True):
if bn:
out = fluid.layers.conv2d(
input=input,
num_filters=ch_out,
filter_size=filter_size,
stride=stride,
padding=padding,
act=None,
param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02),
name=name + "_weights"),
bias_attr=False,
name=name + '.conv2d.output.1')
bn_name = "bn" + name[4:]
out = fluid.layers.batch_norm(input=out,
act=None,
is_test=not is_train,
param_attr=ParamAttr(
initializer=fluid.initializer.Normal(0., 0.02),
regularizer=L2Decay(0.),
name=bn_name + '_scale'),
bias_attr=ParamAttr(
initializer=fluid.initializer.Constant(0.0),
regularizer=L2Decay(0.),
name=bn_name + '_offset'),
moving_mean_name=bn_name+'_mean',
moving_variance_name=bn_name+'_var',
name=bn_name+'.output')
else:
out = fluid.layers.conv2d(
input=input,
num_filters=ch_out,
filter_size=filter_size,
stride=stride,
padding=padding,
act=None,
param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02),
name=name + "_weights"),
bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0),
regularizer=L2Decay(0.),
name=name + "_bias"),
name=name + '.conv2d.output.1')
if act == 'relu':
out = fluid.layers.relu(x=out)
if act == 'leaky':
out = fluid.layers.leaky_relu(x=out, alpha=0.1)
return out
class YOLOv3(object):
def __init__(self,
model_cfg_path,
is_train=True,
use_pyreader=True,
use_random=True):
self.model_cfg_path = model_cfg_path
self.config_parser = ConfigPaser(model_cfg_path)
self.is_train = is_train
self.use_pyreader = use_pyreader
self.use_random = use_random
self.outputs = []
self.losses = []
self.boxes = []
self.scores = []
self.downsample = 32
def build_model(self):
model_defs = self.config_parser.parse()
if model_defs is None:
return None
self.hyperparams = model_defs.pop(0)
assert self.hyperparams['type'].lower() == "net", \
"net config params should be given in the first segment named 'net'"
self.img_height = cfg.input_size
self.img_width = cfg.input_size
self.build_input()
out = self.image
layer_outputs = []
self.yolo_layer_defs = []
self.yolo_anchors = []
self.yolo_classes = []
self.outputs = []
for i, layer_def in enumerate(model_defs):
if layer_def['type'] == 'convolutional':
bn = layer_def.get('batch_normalize', 0)
ch_out = int(layer_def['filters'])
filter_size = int(layer_def['size'])
stride = int(layer_def['stride'])
padding = (filter_size - 1) // 2 if int(layer_def['pad']) else 0
act = layer_def['activation']
out = conv_bn_layer(
input=out,
ch_out=ch_out,
filter_size=filter_size,
stride=stride,
padding=padding,
act=act,
bn=bool(bn),
name="conv"+str(i),
is_train=self.is_train)
elif layer_def['type'] == 'shortcut':
layer_from = int(layer_def['from'])
out = fluid.layers.elementwise_add(
x=out,
y=layer_outputs[layer_from],
name="res"+str(i))
elif layer_def['type'] == 'route':
layers = map(int, layer_def['layers'].split(","))
out = fluid.layers.concat(
input=[layer_outputs[i] for i in layers],
axis=1)
elif layer_def['type'] == 'upsample':
scale = int(layer_def['stride'])
# get dynamic upsample output shape
shape_nchw = fluid.layers.shape(out)
shape_hw = fluid.layers.slice(shape_nchw, axes=[0], \
starts=[2], ends=[4])
shape_hw.stop_gradient = True
in_shape = fluid.layers.cast(shape_hw, dtype='int32')
out_shape = in_shape * scale
out_shape.stop_gradient = True
# reisze by actual_shape
out = fluid.layers.resize_nearest(
input=out,
scale=scale,
actual_shape=out_shape,
name="upsample"+str(i))
elif layer_def['type'] == 'maxpool':
pool_size = int(layer_def['size'])
pool_stride = int(layer_def['stride'])
pool_padding = 0
if pool_stride == 1 and pool_size == 2:
pool_padding = 1
out = fluid.layers.pool2d(
input=out,
pool_type='max',
pool_size=pool_size,
pool_stride=pool_stride,
pool_padding=pool_padding)
elif layer_def['type'] == 'yolo':
self.yolo_layer_defs.append(layer_def)
self.outputs.append(out)
anchor_mask = map(int, layer_def['mask'].split(','))
anchors = map(int, layer_def['anchors'].split(','))
mask_anchors = []
for m in anchor_mask:
mask_anchors.append(anchors[2 * m])
mask_anchors.append(anchors[2 * m + 1])
self.yolo_anchors.append(mask_anchors)
class_num = int(layer_def['classes'])
self.yolo_classes.append(class_num)
if self.is_train:
ignore_thresh = float(layer_def['ignore_thresh'])
loss = fluid.layers.yolov3_loss(
x=out,
gtbox=self.gtbox,
gtlabel=self.gtlabel,
gtscore=self.gtscore,
anchors=anchors,
anchor_mask=anchor_mask,
class_num=class_num,
ignore_thresh=ignore_thresh,
downsample_ratio=self.downsample,
use_label_smooth=cfg.label_smooth,
name="yolo_loss"+str(i))
self.losses.append(fluid.layers.reduce_mean(loss))
else:
boxes, scores = fluid.layers.yolo_box(
x=out,
img_size=self.im_shape,
anchors=mask_anchors,
class_num=class_num,
conf_thresh=cfg.valid_thresh,
downsample_ratio=self.downsample,
name="yolo_box"+str(i))
self.boxes.append(boxes)
self.scores.append(fluid.layers.transpose(scores, perm=[0, 2, 1]))
self.downsample //= 2
layer_outputs.append(out)
def loss(self):
return sum(self.losses)
def get_pred(self):
yolo_boxes = fluid.layers.concat(self.boxes, axis=1)
yolo_scores = fluid.layers.concat(self.scores, axis=2)
return fluid.layers.multiclass_nms(
bboxes=yolo_boxes,
scores=yolo_scores,
score_threshold=cfg.valid_thresh,
nms_top_k=cfg.nms_topk,
keep_top_k=cfg.nms_posk,
nms_threshold=cfg.nms_thresh,
background_label=-1,
name="multiclass_nms")
def get_yolo_anchors(self):
return self.yolo_anchors
def get_yolo_classes(self):
return self.yolo_classes
def build_input(self):
self.image_shape = [3, self.img_height, self.img_width]
if self.use_pyreader and self.is_train:
self.py_reader = fluid.layers.py_reader(
capacity=64,
shapes = [[-1] + self.image_shape, [-1, cfg.max_box_num, 4], [-1, cfg.max_box_num], [-1, cfg.max_box_num]],
lod_levels=[0, 0, 0, 0],
dtypes=['float32'] * 2 + ['int32'] + ['float32'],
use_double_buffer=True)
self.image, self.gtbox, self.gtlabel, self.gtscore = fluid.layers.read_file(self.py_reader)
else:
self.image = fluid.layers.data(
name='image', shape=self.image_shape, dtype='float32'
)
self.gtbox = fluid.layers.data(
name='gtbox', shape=[cfg.max_box_num, 4], dtype='float32'
)
self.gtlabel = fluid.layers.data(
name='gtlabel', shape=[cfg.max_box_num], dtype='int32'
)
self.gtscore = fluid.layers.data(
name='gtscore', shape=[cfg.max_box_num], dtype='float32'
)
self.im_shape = fluid.layers.data(
name="im_shape", shape=[2], dtype='int32')
self.im_id = fluid.layers.data(
name="im_id", shape=[1], dtype='int32')
def feeds(self):
if not self.is_train:
return [self.image, self.im_id, self.im_shape]
return [self.image, self.gtbox, self.gtlabel, self.gtscore]
def get_hyperparams(self):
return self.hyperparams
def get_input_size(self):
return cfg.input_size
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
# #
#Licensed under the Apache License, Version 2.0 (the "License"); #Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License. #you may not use this file except in compliance with the License.
#You may obtain a copy of the License at #You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
#Unless required by applicable law or agreed to in writing, software #Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS, #distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and #See the License for the specific language governing permissions and
#limitations under the License. #limitations under the License.
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant from paddle.fluid.initializer import Constant
from paddle.fluid.regularizer import L2Decay from paddle.fluid.regularizer import L2Decay
from config import cfg
def conv_bn_layer(input,
def conv_bn_layer(input, ch_out,
ch_out, filter_size,
filter_size, stride,
stride, padding,
padding, act='leaky',
act='leaky', is_test=True,
i=0): name=None):
conv1 = fluid.layers.conv2d( conv1 = fluid.layers.conv2d(
input=input, input=input,
num_filters=ch_out, num_filters=ch_out,
filter_size=filter_size, filter_size=filter_size,
stride=stride, stride=stride,
padding=padding, padding=padding,
act=None, act=None,
param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02), param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02),
name="conv" + str(i)+"_weights"), name=name+".conv.weights"),
bias_attr=False) bias_attr=False)
bn_name = "bn" + str(i) bn_name = name + ".bn"
out = fluid.layers.batch_norm(
out = fluid.layers.batch_norm( input=conv1,
input=conv1, act=None,
act=None, is_test=is_test,
is_test=True, param_attr=ParamAttr(
param_attr=ParamAttr( initializer=fluid.initializer.Normal(0., 0.02),
initializer=fluid.initializer.Normal(0., 0.02), regularizer=L2Decay(0.),
regularizer=L2Decay(0.), name=bn_name + '.scale'),
name=bn_name + '_scale'), bias_attr=ParamAttr(
bias_attr=ParamAttr( initializer=fluid.initializer.Constant(0.0),
initializer=fluid.initializer.Constant(0.0), regularizer=L2Decay(0.),
regularizer=L2Decay(0.), name=bn_name + '.offset'),
name=bn_name + '_offset'), moving_mean_name=bn_name + '.mean',
moving_mean_name=bn_name + '_mean', moving_variance_name=bn_name + '.var')
moving_variance_name=bn_name + '_var') if act == 'leaky':
if act == 'leaky': out = fluid.layers.leaky_relu(x=out, alpha=0.1)
out = fluid.layers.leaky_relu(x=out, alpha=0.1) return out
return out
def downsample(input, ch_out, filter_size=3, stride=2, padding=1, is_test=True, name=None):
def basicblock(input, ch_out, stride,i): return conv_bn_layer(input,
""" ch_out=ch_out,
channel: convolution channels for 1x1 conv filter_size=filter_size,
""" stride=stride,
conv1 = conv_bn_layer(input, ch_out, 1, 1, 0, i=i) padding=padding,
conv2 = conv_bn_layer(conv1, ch_out*2, 3, 1, 1, i=i+1) is_test=is_test,
out = fluid.layers.elementwise_add(x=input, y=conv2, act=None,name="res"+str(i+2)) name=name)
return out
def basicblock(input, ch_out, is_test=True, name=None):
def layer_warp(block_func, input, ch_out, count, stride,i): conv1 = conv_bn_layer(input, ch_out, 1, 1, 0, is_test=is_test, name=name+".0")
res_out = block_func(input, ch_out, stride, i=i) conv2 = conv_bn_layer(conv1, ch_out*2, 3, 1, 1, is_test=is_test, name=name+".1")
for j in range(1, count): out = fluid.layers.elementwise_add(x=input, y=conv2, act=None)
res_out = block_func(res_out, ch_out, 1 ,i=i+j*3) return out
return res_out
def layer_warp(block_func, input, ch_out, count, is_test=True, name=None):
DarkNet_cfg = { res_out = block_func(input, ch_out, is_test=is_test, name='{}.0'.format(name))
53: ([1,2,8,8,4],basicblock) for j in range(1, count):
} res_out = block_func(res_out, ch_out, is_test=is_test, name='{}.{}'.format(name, j))
return res_out
# num_filters = [32, 64, 128, 256, 512, 1024]
DarkNet_cfg = {
def add_DarkNet53_conv_body(body_input): 53: ([1,2,8,8,4],basicblock)
}
stages, block_func = DarkNet_cfg[53]
stages = stages[0:5] def add_DarkNet53_conv_body(body_input, is_test=True):
conv1 = conv_bn_layer( stages, block_func = DarkNet_cfg[53]
body_input, ch_out=32, filter_size=3, stride=1, padding=1, act="leaky",i=0) stages = stages[0:5]
conv2 = conv_bn_layer( conv1 = conv_bn_layer(
conv1, ch_out=64, filter_size=3, stride=2, padding=1, act="leaky", i=1) body_input, ch_out=32, filter_size=3, stride=1, padding=1, is_test=is_test, name="yolo_input")
block3 = layer_warp(block_func, conv2, 32, stages[0], 1, i=2) downsample_ = downsample(conv1, ch_out=conv1.shape[1]*2, is_test=is_test, name="yolo_input.downsample")
downsample3 = conv_bn_layer( index = 2
block3, ch_out=128, filter_size=3, stride=2, padding=1, i=5) blocks = []
block4 = layer_warp(block_func, downsample3, 64, stages[1], 1, i=6) for i, stage in enumerate(stages):
downsample4 = conv_bn_layer( block = layer_warp(block_func, downsample_, 32 *(2**i), stage, is_test=is_test, name="stage.{}".format(i))
block4, ch_out=256, filter_size=3, stride=2, padding=1, i=12) blocks.append(block)
block5 = layer_warp(block_func, downsample4, 128, stages[2], 1,i=13) index += 3 * stage
downsample5 = conv_bn_layer( if i < len(stages) - 1: # do not downsaple in the last stage
block5, ch_out=512, filter_size=3, stride=2, padding=1, i=37) downsample_ = downsample(block, ch_out=block.shape[1]*2, is_test=is_test, name="stage.{}.downsample".format(i))
block6 = layer_warp(block_func, downsample5, 256, stages[3], 1, i=38) index += 1
downsample6 = conv_bn_layer( return blocks[-1:-4:-1]
block6, ch_out=1024, filter_size=3, stride=2, padding=1, i=62)
block7 = layer_warp(block_func, downsample6, 512, stages[4], 1,i=63)
return block7,block6,block5
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# #
#Licensed under the Apache License, Version 2.0 (the "License"); #Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License. #you may not use this file except in compliance with the License.
#You may obtain a copy of the License at #You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
#Unless required by applicable law or agreed to in writing, software #Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS, #distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and #See the License for the specific language governing permissions and
#limitations under the License. #limitations under the License.
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant from paddle.fluid.initializer import Constant
from paddle.fluid.initializer import Normal from paddle.fluid.initializer import Normal
from paddle.fluid.regularizer import L2Decay from paddle.fluid.regularizer import L2Decay
from config_parser import ConfigPaser from config import cfg
from config import cfg
from .darknet import add_DarkNet53_conv_body
from darknet import add_DarkNet53_conv_body from .darknet import conv_bn_layer
from darknet import conv_bn_layer
def yolo_detection_block(input, channel, is_test=True, name=None):
def yolo_detection_block(input, channel,i): assert channel % 2 == 0, "channel {} cannot be divided by 2".format(channel)
assert channel % 2 == 0, "channel {} cannot be divided by 2".format(channel) conv = input
conv1 = input for j in range(2):
for j in range(2): conv = conv_bn_layer(conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.{}.0'.format(name, j))
conv1 = conv_bn_layer(conv1, channel, filter_size=1, stride=1, padding=0,i=i+j*2) conv = conv_bn_layer(conv, channel*2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.{}.1'.format(name, j))
conv1 = conv_bn_layer(conv1, channel*2, filter_size=3, stride=1, padding=1,i=i+j*2+1) route = conv_bn_layer(conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.2'.format(name))
route = conv_bn_layer(conv1, channel, filter_size=1, stride=1, padding=0,i=i+4) tip = conv_bn_layer(route,channel*2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.tip'.format(name))
tip = conv_bn_layer(route,channel*2, filter_size=3, stride=1, padding=1,i=i+5) return route, tip
return route, tip
def upsample(input, scale=2,name=None):
def upsample(out, stride=2,name=None): # get dynamic upsample output shape
out = out shape_nchw = fluid.layers.shape(input)
scale = stride shape_hw = fluid.layers.slice(shape_nchw, axes=[0], starts=[2], ends=[4])
# get dynamic upsample output shape shape_hw.stop_gradient = True
shape_nchw = fluid.layers.shape(out) in_shape = fluid.layers.cast(shape_hw, dtype='int32')
shape_hw = fluid.layers.slice(shape_nchw, axes=[0], starts=[2], ends=[4]) out_shape = in_shape * scale
shape_hw.stop_gradient = True out_shape.stop_gradient = True
in_shape = fluid.layers.cast(shape_hw, dtype='int32')
out_shape = in_shape * scale # reisze by actual_shape
out_shape.stop_gradient = True out = fluid.layers.resize_nearest(
input=input,
# reisze by actual_shape scale=scale,
out = fluid.layers.resize_nearest( actual_shape=out_shape,
input=out, name=name)
scale=scale, return out
actual_shape=out_shape,
name=name) class YOLOv3(object):
return out def __init__(self,
model_cfg_path,
class YOLOv3(object): is_train=True,
def __init__(self, use_pyreader=True,
model_cfg_path, use_random=True):
is_train=True, self.model_cfg_path = model_cfg_path
use_pyreader=True, self.is_train = is_train
use_random=True): self.use_pyreader = use_pyreader
self.model_cfg_path = model_cfg_path self.use_random = use_random
self.config_parser = ConfigPaser(model_cfg_path) self.outputs = []
self.is_train = is_train self.losses = []
self.use_pyreader = use_pyreader self.downsample = 32
self.use_random = use_random self.ignore_thresh = .7
self.outputs = [] self.class_num = 80
self.losses = []
self.downsample = 32 def build_model(self):
self.ignore_thresh = .7
self.class_num = 80 self.img_height = cfg.input_size
self.img_width = cfg.input_size
def build_model(self):
self.build_input()
self.img_height = cfg.input_size
self.img_width = cfg.input_size self.outputs = []
self.boxes = []
self.build_input() self.scores = []
out = self.image blocks = add_DarkNet53_conv_body(self.image, not self.is_train)
for i, block in enumerate(blocks):
self.yolo_anchors = [] if i > 0:
self.yolo_classes = [] block = fluid.layers.concat(
self.outputs = [] input=[route, block],
self.boxes = [] axis=1)
self.scores = [] route, tip = yolo_detection_block(block, channel=512//(2**i),
is_test=(not self.is_train),
name="yolo_block.{}".format(i))
scale1,scale2,scale3 = add_DarkNet53_conv_body(out) block_out = fluid.layers.conv2d(
input=tip,
# 13*13 scale output num_filters=255,
route1, tip1 = yolo_detection_block(scale1, channel=512,i=75) filter_size=1,
# scale1 output stride=1,
scale1_out = fluid.layers.conv2d( padding=0,
input=tip1, act=None,
num_filters=255, param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02),
filter_size=1, name="yolo_output.{}.conv.weights".format(i)),
stride=1, bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0),
padding=0, regularizer=L2Decay(0.),
act=None, name="yolo_output.{}.conv.bias".format(i)))
param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02), self.outputs.append(block_out)
name="conv81_weights"),
bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), if i < len(blocks) - 1:
regularizer=L2Decay(0.), route = conv_bn_layer(
name="conv81_bias")) input=route,
ch_out=256//(2**i),
self.outputs.append(scale1_out) filter_size=1,
stride=1,
route1 = conv_bn_layer( padding=0,
input=route1, is_test=(not self.is_train),
ch_out=256, name="yolo_transition.{}".format(i))
filter_size=1, # upsample
stride=1, route = upsample(route)
padding=0,
i=84)
# upsample anchor_mask = [6,7,8,3,4,5,0,1,2]
route1 = upsample(route1) anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326]
for i,out in enumerate(self.outputs):
# concat mask = anchor_mask[i*3 : (i+1)*3]
route1 = fluid.layers.concat( mask_anchors=[]
input=[route1,scale2],
axis=1) for m in mask:
mask_anchors.append(anchors[2 * m])
# 26*26 scale output mask_anchors.append(anchors[2 * m + 1])
route2, tip2 = yolo_detection_block(route1, channel=256,i=87) class_num = int(self.class_num)
# scale2 output if self.is_train:
scale2_out = fluid.layers.conv2d( ignore_thresh = float(self.ignore_thresh)
input=tip2, loss = fluid.layers.yolov3_loss(
num_filters=255, x=out,
filter_size=1, gtbox=self.gtbox,
stride=1, gtlabel=self.gtlabel,
padding=0, gtscore=self.gtscore,
act=None, anchors=anchors,
param_attr=ParamAttr(name="conv93_weights"), anchor_mask=mask,
bias_attr=ParamAttr(name="conv93_bias")) class_num=class_num,
ignore_thresh=ignore_thresh,
self.outputs.append(scale2_out) downsample_ratio=self.downsample,
use_label_smooth=cfg.label_smooth,
route2 = conv_bn_layer( name="yolo_loss"+str(i))
input=route2, self.losses.append(fluid.layers.reduce_mean(loss))
ch_out=128, else:
filter_size=1, boxes, scores = fluid.layers.yolo_box(
stride=1, x=out,
padding=0, img_size=self.im_shape,
i=96) anchors=mask_anchors,
# upsample class_num=class_num,
route2 = upsample(route2) conf_thresh=cfg.valid_thresh,
downsample_ratio=self.downsample,
# concat name="yolo_box"+str(i))
route2 = fluid.layers.concat( self.boxes.append(boxes)
input=[route2,scale3], self.scores.append(fluid.layers.transpose(scores, perm=[0, 2, 1]))
axis=1)
self.downsample //= 2
# 52*52 scale output
route3, tip3 = yolo_detection_block(route2, channel=128, i=99)
def loss(self):
# scale3 output return sum(self.losses)
scale3_out = fluid.layers.conv2d(
input=tip3, def get_pred(self):
num_filters=255, yolo_boxes = fluid.layers.concat(self.boxes, axis=1)
filter_size=1, yolo_scores = fluid.layers.concat(self.scores, axis=2)
stride=1, return fluid.layers.multiclass_nms(
padding=0, bboxes=yolo_boxes,
act=None, scores=yolo_scores,
param_attr=ParamAttr(name="conv105_weights"), score_threshold=cfg.valid_thresh,
bias_attr=ParamAttr(name="conv105_bias")) nms_top_k=cfg.nms_topk,
keep_top_k=cfg.nms_posk,
nms_threshold=cfg.nms_thresh,
self.outputs.append(scale3_out) background_label=-1,
# yolo name="multiclass_nms")
anchor_mask = [6,7,8,3,4,5,0,1,2] def build_input(self):
anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326] self.image_shape = [3, self.img_height, self.img_width]
for i,out in enumerate(self.outputs): if self.use_pyreader and self.is_train:
mask = anchor_mask[i*3 : (i+1)*3] self.py_reader = fluid.layers.py_reader(
mask_anchors=[] capacity=64,
shapes = [[-1] + self.image_shape, [-1, cfg.max_box_num, 4], [-1, cfg.max_box_num], [-1, cfg.max_box_num]],
for m in mask: lod_levels=[0, 0, 0, 0],
mask_anchors.append(anchors[2 * m]) dtypes=['float32'] * 2 + ['int32'] + ['float32'],
mask_anchors.append(anchors[2 * m + 1]) use_double_buffer=True)
self.yolo_anchors.append(mask_anchors) self.image, self.gtbox, self.gtlabel, self.gtscore = fluid.layers.read_file(self.py_reader)
class_num = int(self.class_num) else:
self.yolo_classes.append(class_num) self.image = fluid.layers.data(
name='image', shape=self.image_shape, dtype='float32'
if self.is_train: )
ignore_thresh = float(self.ignore_thresh) self.gtbox = fluid.layers.data(
loss = fluid.layers.yolov3_loss( name='gtbox', shape=[cfg.max_box_num, 4], dtype='float32'
x=out, )
gtbox=self.gtbox, self.gtlabel = fluid.layers.data(
gtlabel=self.gtlabel, name='gtlabel', shape=[cfg.max_box_num], dtype='int32'
# gtscore=self.gtscore, )
anchors=anchors, self.gtscore = fluid.layers.data(
anchor_mask=mask, name='gtscore', shape=[cfg.max_box_num], dtype='float32'
class_num=class_num, )
ignore_thresh=ignore_thresh, self.im_shape = fluid.layers.data(
downsample_ratio=self.downsample, name="im_shape", shape=[2], dtype='int32')
# use_label_smooth=False, self.im_id = fluid.layers.data(
name="yolo_loss"+str(i)) name="im_id", shape=[1], dtype='int32')
self.losses.append(fluid.layers.reduce_mean(loss))
else: def feeds(self):
boxes, scores = fluid.layers.yolo_box( if not self.is_train:
x=out, return [self.image, self.im_id, self.im_shape]
img_size=self.im_shape, return [self.image, self.gtbox, self.gtlabel, self.gtscore]
anchors=mask_anchors,
class_num=class_num,
conf_thresh=cfg.valid_thresh,
downsample_ratio=self.downsample,
name="yolo_box"+str(i))
self.boxes.append(boxes)
self.scores.append(fluid.layers.transpose(scores, perm=[0, 2, 1]))
self.downsample //= 2
def loss(self):
return sum(self.losses)
def get_pred(self):
# return self.outputs
yolo_boxes = fluid.layers.concat(self.boxes, axis=1)
yolo_scores = fluid.layers.concat(self.scores, axis=2)
return fluid.layers.multiclass_nms(
bboxes=yolo_boxes,
scores=yolo_scores,
score_threshold=cfg.valid_thresh,
nms_top_k=cfg.nms_topk,
keep_top_k=cfg.nms_posk,
nms_threshold=cfg.nms_thresh,
background_label=-1,
name="multiclass_nms")
def get_yolo_anchors(self):
return self.yolo_anchors
def get_yolo_classes(self):
return self.yolo_classes
def build_input(self):
self.image_shape = [3, self.img_height, self.img_width]
if self.use_pyreader and self.is_train:
self.py_reader = fluid.layers.py_reader(
capacity=64,
shapes = [[-1] + self.image_shape, [-1, cfg.max_box_num, 4], [-1, cfg.max_box_num], [-1, cfg.max_box_num]],
lod_levels=[0, 0, 0, 0],
dtypes=['float32'] * 2 + ['int32'] + ['float32'],
use_double_buffer=True)
self.image, self.gtbox, self.gtlabel, self.gtscore = fluid.layers.read_file(self.py_reader)
else:
self.image = fluid.layers.data(
name='image', shape=self.image_shape, dtype='float32'
)
self.gtbox = fluid.layers.data(
name='gtbox', shape=[cfg.max_box_num, 4], dtype='float32'
)
self.gtlabel = fluid.layers.data(
name='gtlabel', shape=[cfg.max_box_num], dtype='int32'
)
self.gtscore = fluid.layers.data(
name='gtscore', shape=[cfg.max_box_num], dtype='float32'
)
self.im_shape = fluid.layers.data(
name="im_shape", shape=[2], dtype='int32')
self.im_id = fluid.layers.data(
name="im_id", shape=[1], dtype='int32')
def feeds(self):
if not self.is_train:
return [self.image, self.im_id, self.im_shape]
return [self.image, self.gtbox, self.gtlabel, self.gtscore]
def get_input_size(self):
return cfg.input_size
...@@ -255,8 +255,8 @@ def train(size=416, ...@@ -255,8 +255,8 @@ def train(size=416,
random_sizes=[], random_sizes=[],
interval=10, interval=10,
pyreader_num=1, pyreader_num=1,
num_workers=16, num_workers=2,
max_queue=32, max_queue=4,
use_multiprocessing=True): use_multiprocessing=True):
generator = dsr.get_reader('train', size, batch_size, shuffle, int(mixup_iter/pyreader_num), random_sizes) generator = dsr.get_reader('train', size, batch_size, shuffle, int(mixup_iter/pyreader_num), random_sizes)
......
...@@ -26,7 +26,7 @@ from utility import parse_args, print_arguments, SmoothedValue ...@@ -26,7 +26,7 @@ from utility import parse_args, print_arguments, SmoothedValue
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import reader import reader
import models.yolov3 as models from models.yolov3 import YOLOv3
from learning_rate import exponential_with_warmup_decay from learning_rate import exponential_with_warmup_decay
from config import cfg from config import cfg
...@@ -42,27 +42,21 @@ def train(): ...@@ -42,27 +42,21 @@ def train():
if not os.path.exists(cfg.model_save_dir): if not os.path.exists(cfg.model_save_dir):
os.makedirs(cfg.model_save_dir) os.makedirs(cfg.model_save_dir)
model = models.YOLOv3(cfg.model_cfg_path, use_pyreader=cfg.use_pyreader) model = YOLOv3(cfg.model_cfg_path, use_pyreader=cfg.use_pyreader)
model.build_model() model.build_model()
input_size = model.get_input_size() input_size = cfg.input_size
loss = model.loss() loss = model.loss()
loss.persistable = True loss.persistable = True
print("cfg.learning",cfg.learning_rate)
print("cfg.decay",cfg.decay)
devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
devices_num = len(devices.split(",")) devices_num = len(devices.split(","))
print("Found {} CUDA devices.".format(devices_num)) print("Found {} CUDA devices.".format(devices_num))
learning_rate = float(cfg.learning_rate) learning_rate = cfg.learning_rate
boundaries = cfg.lr_steps boundaries = cfg.lr_steps
gamma = cfg.lr_gamma gamma = cfg.lr_gamma
step_num = len(cfg.lr_steps) step_num = len(cfg.lr_steps)
if isinstance(gamma, list): values = [learning_rate * (gamma**i) for i in range(step_num + 1)]
values = [learning_rate * g for g in gamma]
else:
values = [learning_rate * (gamma**i) for i in range(step_num + 1)]
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
learning_rate=exponential_with_warmup_decay( learning_rate=exponential_with_warmup_decay(
...@@ -70,10 +64,9 @@ def train(): ...@@ -70,10 +64,9 @@ def train():
boundaries=boundaries, boundaries=boundaries,
values=values, values=values,
warmup_iter=cfg.warm_up_iter, warmup_iter=cfg.warm_up_iter,
warmup_factor=cfg.warm_up_factor, warmup_factor=cfg.warm_up_factor),
start_step=cfg.start_iter), regularization=fluid.regularizer.L2Decay(cfg.weight_decay),
regularization=fluid.regularizer.L2Decay(float(cfg.decay)), momentum=cfg.momentum)
momentum=float(cfg.momentum))
optimizer.minimize(loss) optimizer.minimize(loss)
fluid.memory_optimize(fluid.default_main_program()) fluid.memory_optimize(fluid.default_main_program())
...@@ -98,11 +91,11 @@ def train(): ...@@ -98,11 +91,11 @@ def train():
mixup_iter = cfg.max_iter - cfg.start_iter - cfg.no_mixup_iter mixup_iter = cfg.max_iter - cfg.start_iter - cfg.no_mixup_iter
if cfg.use_pyreader: if cfg.use_pyreader:
train_reader = reader.train(input_size, batch_size=int(cfg.batch)/devices_num, shuffle=True, mixup_iter=mixup_iter*devices_num, random_sizes=random_sizes, interval=10, pyreader_num=devices_num, use_multiprocessing=cfg.use_multiprocess) train_reader = reader.train(input_size, batch_size=cfg.batch_size/devices_num, shuffle=True, mixup_iter=mixup_iter*devices_num, random_sizes=random_sizes, interval=10, pyreader_num=devices_num, use_multiprocessing=cfg.use_multiprocess)
py_reader = model.py_reader py_reader = model.py_reader
py_reader.decorate_paddle_reader(train_reader) py_reader.decorate_paddle_reader(train_reader)
else: else:
train_reader = reader.train(input_size, batch_size=int(cfg.batch), shuffle=True, mixup_iter=mixup_iter, random_sizes=random_sizes, use_multiprocessing=cfg.use_multiprocess) train_reader = reader.train(input_size, batch_size=cfg.batch_size, shuffle=True, mixup_iter=mixup_iter, random_sizes=random_sizes, use_multiprocessing=cfg.use_multiprocess)
feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())
def save_model(postfix): def save_model(postfix):
......
...@@ -108,14 +108,15 @@ def parse_args(): ...@@ -108,14 +108,15 @@ def parse_args():
add_arg('start_iter', int, 0, "Start iteration.") add_arg('start_iter', int, 0, "Start iteration.")
add_arg('use_multiprocess', bool, True, "add multiprocess.") add_arg('use_multiprocess', bool, True, "add multiprocess.")
#SOLVER #SOLVER
add_arg('batch_size', int, 64, "Learning rate.")
add_arg('learning_rate', float, 0.001, "Learning rate.") add_arg('learning_rate', float, 0.001, "Learning rate.")
add_arg('max_iter', int, 500200, "Iter number.") add_arg('max_iter', int, 500200, "Iter number.")
add_arg('snapshot_iter', int, 2000, "Save model every snapshot stride.") add_arg('snapshot_iter', int, 2000, "Save model every snapshot stride.")
add_arg('label_smooth', bool, True, "Use label smooth in class label.")
add_arg('no_mixup_iter', int, 40000, "Disable mixup in last N iter.")
# TRAIN TEST INFER # TRAIN TEST INFER
add_arg('input_size', int, 608, "Image input size of YOLOv3.") add_arg('input_size', int, 608, "Image input size of YOLOv3.")
add_arg('random_shape', bool, True, "Resize to random shape for train reader.") add_arg('random_shape', bool, True, "Resize to random shape for train reader.")
add_arg('label_smooth', bool, True, "Use label smooth in class label.")
add_arg('no_mixup_iter', int, 40000, "Disable mixup in last N iter.")
add_arg('valid_thresh', float, 0.005, "Valid confidence score for NMS.") add_arg('valid_thresh', float, 0.005, "Valid confidence score for NMS.")
add_arg('nms_thresh', float, 0.45, "NMS threshold.") add_arg('nms_thresh', float, 0.45, "NMS threshold.")
add_arg('nms_topk', int, 400, "The number of boxes to perform NMS.") add_arg('nms_topk', int, 400, "The number of boxes to perform NMS.")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册