提交 98f4e41c 编写于 作者: F FDInSky

test=develop clean code

上级 4a6db5ef
#!/bin/bash
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
cudaid=${face_detection:=0} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --model_save_dir=output/ --data_dir=dataset/coco/ --max_iter=500 --enable_ce --pretrained_model=./imagenet_resnet50_fusebn --learning_rate=0.00125 | python _ce.py
cudaid=${face_detection_m:=0,1,2,3} # use 0,1,2,3 card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --model_save_dir=output/ --data_dir=dataset/coco/ --max_iter=500 --enable_ce --pretrained_model=./imagenet_resnet50_fusebn --learning_rate=0.005 | python _ce.py
......@@ -29,7 +29,7 @@ RCNN系列目前包含两个代表模型:Faster RCNN,Mask RCNN
[Mask RCNN](https://arxiv.org/abs/1703.06870) 扩展自Faster RCNN,是经典的实例分割模型。
Mask RCNN同样为两阶段框架,第一阶段扫描图像生成候选框;第二阶段根据候选框得到分类结果,边界框,同时在原有Faster RCNN模型基础上添加分割分支,得到掩码结果,实现了掩码和类别预测关系的解
Mask RCNN同样为两阶段框架,第一阶段扫描图像生成候选框;第二阶段根据候选框得到分类结果,边界框,同时在原有Faster RCNN模型基础上添加分割分支,得到掩码结果,实现了掩码和类别预测关系的解
## 数据准备
......@@ -62,7 +62,7 @@ data/coco/
## 模型训练
**下载预训练模型:** 本示例提供Resnet-50预训练模型,该模转换自Caffe,并对批标准化层(Batch Normalization Layer)进行参数融合。采用如下命令下载预训练模型:
**下载预训练模型:** 本示例提供Resnet-50预训练模型,该模转换自Caffe,并对批标准化层(Batch Normalization Layer)进行参数融合。采用如下命令下载预训练模型:
sh ./pretrained/download.sh
......
# this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi
from kpi import DurationKpi
each_pass_duration_card1_kpi = DurationKpi(
'each_pass_duration_card1', 0.08, 0, actived=True)
train_loss_card1_kpi = CostKpi('train_loss_card1', 0.08, 0)
each_pass_duration_card4_kpi = DurationKpi(
'each_pass_duration_card4', 0.08, 0, actived=True)
train_loss_card4_kpi = CostKpi('train_loss_card4', 0.08, 0)
tracking_kpis = [
each_pass_duration_card1_kpi,
train_loss_card1_kpi,
each_pass_duration_card4_kpi,
train_loss_card4_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
......@@ -99,77 +99,73 @@ def eval():
train_reader = fluid.contrib.reader.distributed_batch_reader(
train_reader)
def eval_loop():
eval_start = time.time()
dts_res = []
segms_res = []
for iter_id, data in enumerate(test_reader()):
start = time.time()
image_data = np.array([x[0] for x in data]).astype('float32')
image_info_data = np.array([x[1] for x in data]).astype('float32')
image_id_data = np.array([x[2] for x in data]).astype('int32')
if cfg.enable_ce:
print("image_data: ", np.abs(image_data).mean(),
image_data.shape)
print("im_info_dta: ", np.abs(image_info_data).mean(),
image_info_data.shape, image_info_data)
print("img_id: ", image_id_data, image_id_data.shape)
# forward
outputs = model(image_data, image_info_data, image_id_data)
pred_boxes_v = outputs[1].numpy()
if cfg.MASK_ON:
masks_v = outputs[2].numpy()
new_lod = list(outputs[0].numpy())
#new_lod = [[0, pred_boxes_v.shape[0]]] #pred_boxes_v.lod()
nmsed_out = pred_boxes_v
dts_res += get_dt_res(total_batch_size, new_lod, nmsed_out, data,
num_id_to_cat_id_map)
if cfg.MASK_ON and np.array(masks_v).shape != (1, 1):
segms_out = segm_results(nmsed_out, masks_v, image_info_data)
segms_res += get_segms_res(total_batch_size, new_lod, segms_out,
data, num_id_to_cat_id_map)
end = time.time()
print('batch id: {}, time: {}'.format(iter_id, end - start))
eval_end = time.time()
total_time = eval_end - eval_start
print('average time of eval is: {}'.format(total_time / (iter_id + 1)))
assert len(dts_res) > 0, "The number of valid bbox detected is zero.\n \
Please use reasonable model and check input data."
eval_start = time.time()
dts_res = []
segms_res = []
for iter_id, data in enumerate(test_reader()):
start = time.time()
image_data = np.array([x[0] for x in data]).astype('float32')
image_info_data = np.array([x[1] for x in data]).astype('float32')
image_id_data = np.array([x[2] for x in data]).astype('int32')
if cfg.enable_ce:
print("image_data: ", np.abs(image_data).mean(), image_data.shape)
print("im_info_dta: ", np.abs(image_info_data).mean(),
image_info_data.shape, image_info_data)
print("img_id: ", image_id_data, image_id_data.shape)
# forward
outputs = model(image_data, image_info_data, image_id_data)
pred_boxes_v = outputs[1].numpy()
if cfg.MASK_ON:
assert len(
segms_res) > 0, "The number of valid mask detected is zero.\n \
Please use reasonable model and check input data."
masks_v = outputs[2].numpy()
new_lod = list(outputs[0].numpy())
#new_lod = [[0, pred_boxes_v.shape[0]]] #pred_boxes_v.lod()
nmsed_out = pred_boxes_v
dts_res += get_dt_res(total_batch_size, new_lod, nmsed_out, data,
num_id_to_cat_id_map)
if cfg.MASK_ON and np.array(masks_v).shape != (1, 1):
segms_out = segm_results(nmsed_out, masks_v, image_info_data)
segms_res += get_segms_res(total_batch_size, new_lod, segms_out,
data, num_id_to_cat_id_map)
end = time.time()
print('batch id: {}, time: {}'.format(iter_id, end - start))
eval_end = time.time()
total_time = eval_end - eval_start
print('average time of eval is: {}'.format(total_time / (iter_id + 1)))
assert len(dts_res) > 0, "The number of valid bbox detected is zero.\n \
Please use reasonable model and check input data."
if cfg.MASK_ON:
assert len(
segms_res) > 0, "The number of valid mask detected is zero.\n \
Please use reasonable model and check input data."
with io.open("detection_bbox_result.json", 'w') as outfile:
with io.open("detection_bbox_result.json", 'w') as outfile:
encode_func = unicode if six.PY2 else str
outfile.write(encode_func(json.dumps(dts_res)))
print("start evaluate bbox using coco api")
cocoDt = cocoGt.loadRes("detection_bbox_result.json")
cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
if cfg.MASK_ON:
with io.open("detection_segms_result.json", 'w') as outfile:
encode_func = unicode if six.PY2 else str
outfile.write(encode_func(json.dumps(dts_res)))
print("start evaluate bbox using coco api")
cocoDt = cocoGt.loadRes("detection_bbox_result.json")
cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
outfile.write(encode_func(json.dumps(segms_res)))
print("start evaluate mask using coco api")
cocoDt = cocoGt.loadRes("detection_segms_result.json")
cocoEval = COCOeval(cocoGt, cocoDt, 'segm')
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
if cfg.MASK_ON:
with io.open("detection_segms_result.json", 'w') as outfile:
encode_func = unicode if six.PY2 else str
outfile.write(encode_func(json.dumps(segms_res)))
print("start evaluate mask using coco api")
cocoDt = cocoGt.loadRes("detection_segms_result.json")
cocoEval = COCOeval(cocoGt, cocoDt, 'segm')
cocoEval.evaluate()
cocoEval.accumulate()
eval_loop()
if __name__ == '__main__':
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
......
import os
import time
import numpy as np
from eval_helper import *
import paddle
import paddle.fluid as fluid
import reader
from utility import print_arguments, parse_args, check_gpu
import models.model_builder as model_builder
import models.resnet as resnet
from config import cfg
from data_utils import DatasetPath
def infer():
try:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval, Params
data_path = DatasetPath('val')
test_list = data_path.get_file_list()
coco_api = COCO(test_list)
cid = coco_api.getCatIds()
cat_id_to_num_id_map = {
v: i + 1
for i, v in enumerate(coco_api.getCatIds())
}
category_ids = coco_api.getCatIds()
labels_map = {
cat_id_to_num_id_map[item['id']]: item['name']
for item in coco_api.loadCats(category_ids)
}
labels_map[0] = 'background'
except:
print("The COCO dataset or COCO API is not exist, use the default "
"mapping of class index and real category name on COCO17.")
assert cfg.dataset == 'coco2017'
labels_map = coco17_labels()
image_shape = [3, cfg.TEST.max_size, cfg.TEST.max_size]
class_nums = cfg.class_num
model = model_builder.RCNN(
add_conv_body_func=resnet.add_ResNet50_conv4_body,
add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head,
use_pyreader=False,
mode='infer')
model.build_model(image_shape)
pred_boxes = model.eval_bbox_out()
if cfg.MASK_ON:
masks = model.eval_mask_out()
place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
# yapf: disable
if not os.path.exists(cfg.pretrained_model):
raise ValueError("Model path [%s] does not exist." % (cfg.pretrained_model))
def if_exist(var):
return os.path.exists(os.path.join(cfg.pretrained_model, var.name))
fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)
# yapf: enable
infer_reader = reader.infer(cfg.image_path)
feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())
dts_res = []
segms_res = []
if cfg.MASK_ON:
fetch_list = [pred_boxes, masks]
else:
fetch_list = [pred_boxes]
data = next(infer_reader())
im_info = [data[0][1]]
result = exe.run(fetch_list=[v.name for v in fetch_list],
feed=feeder.feed(data),
return_numpy=False)
pred_boxes_v = result[0]
if cfg.MASK_ON:
masks_v = result[1]
new_lod = pred_boxes_v.lod()
nmsed_out = pred_boxes_v
image = None
if cfg.MASK_ON:
segms_out = segm_results(nmsed_out, masks_v, im_info)
image = draw_mask_on_image(cfg.image_path, segms_out,
cfg.draw_threshold)
draw_bounding_box_on_image(cfg.image_path, nmsed_out, cfg.draw_threshold,
labels_map, image)
if __name__ == '__main__':
args = parse_args()
print_arguments(args)
check_gpu(args.use_gpu)
infer()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
import paddle.fluid.layers.learning_rate_scheduler as lr_scheduler
from paddle.fluid.layers import control_flow
def exponential_with_warmup_decay(learning_rate, boundaries, values,
warmup_iter, warmup_factor):
global_step = lr_scheduler._decay_step_counter()
lr = fluid.layers.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate")
warmup_iter_var = fluid.layers.fill_constant(
shape=[1], dtype='float32', value=float(warmup_iter), force_cpu=True)
with control_flow.Switch() as switch:
with switch.case(global_step < warmup_iter_var):
alpha = global_step / warmup_iter_var
factor = warmup_factor * (1 - alpha) + alpha
decayed_lr = learning_rate * factor
fluid.layers.assign(decayed_lr, lr)
for i in range(len(boundaries)):
boundary_val = fluid.layers.fill_constant(
shape=[1],
dtype='float32',
value=float(boundaries[i]),
force_cpu=True)
value_var = fluid.layers.fill_constant(
shape=[1], dtype='float32', value=float(values[i]))
with switch.case(global_step < boundary_val):
fluid.layers.assign(value_var, lr)
last_value_var = fluid.layers.fill_constant(
shape=[1], dtype='float32', value=float(values[len(values) - 1]))
with switch.default():
fluid.layers.assign(last_value_var, lr)
return lr
cimport cython
import numpy as np
cimport numpy as np
from .bbox import *
@cython.boundscheck(False)
@cython.wraparound(False)
def rpn_target_assign(
anchor_box,
gt_boxes,
is_crowd,
im_info,
rpn_straddle_thresh,
rpn_batch_size_per_im,
rpn_positive_overlap,
rpn_negative_overlap,
rpn_fg_fraction,
use_random=False):
anchor_num = anchor_box.shape[0]
batch_size = gt_boxes.shape[0]
for i in range(batch_size):
im_height = im_info[i][0]
im_width = im_info[i][1]
im_scale = im_info[i][2]
if rpn_straddle_thresh >= 0:
# Only keep anchors inside the image by a margin of straddle_thresh
inds_inside = np.where(
(anchor_box[:, 0] >= -rpn_straddle_thresh) &
(anchor_box[:, 1] >= -rpn_straddle_thresh) & (
anchor_box[:, 2] < im_width + rpn_straddle_thresh) & (
anchor_box[:, 3] < im_height + rpn_straddle_thresh))[0]
# keep only inside anchors
inside_anchors = anchor_box[inds_inside, :]
else:
inds_inside = np.arange(anchor_box.shape[0])
inside_anchors = anchor_box
gt_boxes_slice = gt_boxes[i] * im_scale
is_crowd_slice = is_crowd[i]
not_crowd_inds = np.where(is_crowd_slice == 0)[0]
gt_boxes_slice = gt_boxes_slice[not_crowd_inds]
iou = bbox_overlaps(inside_anchors, gt_boxes_slice)
loc_inds, score_inds, labels, gt_inds, bbox_inside_weight = \
_sample_anchor(iou, rpn_batch_size_per_im,
rpn_positive_overlap,
rpn_negative_overlap,
rpn_fg_fraction,
use_random)
# unmap to all anchor
loc_inds = inds_inside[loc_inds]
score_inds = inds_inside[score_inds]
sampled_anchor = anchor_box[loc_inds]
sampled_gt = gt_boxes_slice[gt_inds]
box_deltas = box_to_delta(
sampled_anchor, sampled_gt,
[1., 1., 1., 1.]
)
if i == 0:
loc_indexes = loc_inds
score_indexes = score_inds
tgt_labels = labels
tgt_bboxes = box_deltas
bbox_inside_weights = bbox_inside_weight
else:
loc_indexes = np.concatenate(
[loc_indexes, loc_inds + i * anchor_num])
score_indexes = np.concatenate(
[score_indexes, score_inds + i * anchor_num])
tgt_labels = np.concatenate([tgt_labels, labels])
tgt_bboxes = np.vstack([tgt_bboxes, box_deltas])
bbox_inside_weights = np.vstack([bbox_inside_weights, \
bbox_inside_weight])
return loc_indexes, score_indexes, tgt_labels, tgt_bboxes, bbox_inside_weights
#@jit
def _sample_anchor(
anchor_by_gt_overlap,
rpn_batch_size_per_im,
rpn_positive_overlap,
rpn_negative_overlap,
rpn_fg_fraction,
use_random=False):
anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1)
anchor_to_gt_max = anchor_by_gt_overlap[np.arange(
anchor_by_gt_overlap.shape[0]), anchor_to_gt_argmax]
gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0)
gt_to_anchor_max = anchor_by_gt_overlap[gt_to_anchor_argmax, np.arange(
anchor_by_gt_overlap.shape[1])]
anchors_with_max_overlap = np.where(
anchor_by_gt_overlap == gt_to_anchor_max)[0]
labels = np.ones((anchor_by_gt_overlap.shape[0], ), dtype=np.int32) * -1
labels[anchors_with_max_overlap] = 1
labels[anchor_to_gt_max >= rpn_positive_overlap] = 1
num_fg = int(rpn_fg_fraction * rpn_batch_size_per_im)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg and use_random:
disable_inds = np.random.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
else:
disable_inds = fg_inds[num_fg:]
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
num_bg = rpn_batch_size_per_im - np.sum(labels == 1)
bg_inds = np.where(anchor_to_gt_max < rpn_negative_overlap)[0]
if len(bg_inds) > num_bg and use_random:
enable_inds = bg_inds[np.random.randint(len(bg_inds), size=num_bg)]
else:
enable_inds = bg_inds[:num_bg]
fg_fake_inds = np.array([], np.int32)
fg_value = np.array([fg_inds[0]], np.int32)
fake_num = 0
for bg_id in enable_inds:
if bg_id in fg_inds:
fake_num += 1
fg_fake_inds = np.hstack([fg_fake_inds, fg_value])
labels[enable_inds] = 0
#bbox_inside_weight[fake_num:, :] = 1
fg_inds = np.where(labels == 1)[0]
bg_inds = np.where(labels == 0)[0]
loc_index = np.hstack([fg_fake_inds, fg_inds])
score_index = np.hstack([fg_inds, bg_inds])
labels = labels[score_index]
assert not np.any(labels == -1), "Wrong labels with -1"
gt_inds = anchor_to_gt_argmax[loc_index]
bbox_inside_weight = np.zeros((len(loc_index), 4), dtype=np.float32)
bbox_inside_weight[fake_num:, :] = 1
return loc_index, score_index, labels, gt_inds, bbox_inside_weight
#@jit
def generate_proposal_labels(
rpn_rois, rpn_rois_lod, gt_classes, is_crowd, gt_boxes, im_info, batch_size_per_im,
fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights,
class_nums, use_random=False, is_cls_agnostic=False, is_cascade_rcnn=False):
rois = []
labels_int32 = []
bbox_targets = []
bbox_inside_weights = []
bbox_outside_weights = []
lod = []
batch_size = gt_boxes.shape[0]
# TODO: modify here
# rpn_rois = rpn_rois.reshape(batch_size, -1, 4)
st_num = 0
for im_i, rpn_rois_num in enumerate(rpn_rois_lod):
frcn_blobs = _sample_rois(
rpn_rois[st_num:rpn_rois_num],
gt_classes[im_i], is_crowd[im_i], gt_boxes[im_i], im_info[im_i],
batch_size_per_im, fg_fraction, fg_thresh,
bg_thresh_hi, bg_thresh_lo, bbox_reg_weights,
class_nums, use_random, is_cls_agnostic, is_cascade_rcnn)
st_num = rpn_rois_num
rois.append(frcn_blobs['rois'])
labels_int32.append(frcn_blobs['labels_int32'])
bbox_targets.append(frcn_blobs['bbox_targets'])
bbox_inside_weights.append(frcn_blobs['bbox_inside_weights'])
bbox_outside_weights.append(frcn_blobs['bbox_outside_weights'])
lod.append(frcn_blobs['rois'].shape[0])
o_rois = np.concatenate(rois, axis=0).astype(np.float32)
o_labels = np.concatenate(labels_int32, axis=0).astype(np.int32).reshape(-1, 1)
o_bbox_targets = np.concatenate(bbox_targets, axis=0).astype(np.float32)
o_bbox_inside_weights = np.concatenate(bbox_inside_weights, axis=0).astype(np.float32)
o_bbox_outside_weights = np.concatenate(bbox_outside_weights, axis=0).astype(np.float32)
o_lod = np.asarray(lod, np.int32)
return o_rois, o_labels, o_bbox_targets, o_bbox_inside_weights, o_bbox_outside_weights, o_lod
#@jit
def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
bg_thresh_lo, bbox_reg_weights, class_nums, use_random, is_cls_agnostic,
is_cascade_rcnn):
rois_per_image = int(batch_size_per_im)
#print("debug rois_per_image: ", rois_per_image)
fg_rois_per_im = int(np.round(fg_fraction * rois_per_image))
# Roidb
im_scale = im_info[2]
inv_im_scale = 1. / im_scale
rpn_rois = rpn_rois * inv_im_scale
if is_cascade_rcnn:
rpn_rois = rpn_rois[gt_boxes.shape[0]:, :]
boxes = np.vstack([gt_boxes, rpn_rois])
gt_overlaps = np.zeros((boxes.shape[0], class_nums))
box_to_gt_ind_map = np.zeros((boxes.shape[0]), dtype=np.int32)
if len(gt_boxes) > 0:
proposal_to_gt_overlaps = bbox_overlaps(boxes, gt_boxes)
overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1)
overlaps_max = proposal_to_gt_overlaps.max(axis=1)
# Boxes which with non-zero overlap with gt boxes
overlapped_boxes_ind = np.where(overlaps_max > 0)[0]
overlapped_boxes_gt_classes = gt_classes[overlaps_argmax[
overlapped_boxes_ind]]
gt_overlaps[overlapped_boxes_ind,
overlapped_boxes_gt_classes] = overlaps_max[
overlapped_boxes_ind]
box_to_gt_ind_map[overlapped_boxes_ind] = overlaps_argmax[
overlapped_boxes_ind]
crowd_ind = np.where(is_crowd)[0]
gt_overlaps[crowd_ind] = -1
max_overlaps = gt_overlaps.max(axis=1)
max_classes = gt_overlaps.argmax(axis=1)
# Cascade RCNN Decode Filter
if is_cascade_rcnn:
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws > 0) & (hs > 0))[0]
boxes = boxes[keep]
fg_inds = np.where(max_overlaps >= fg_thresh)[0]
bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >=
bg_thresh_lo))[0]
fg_rois_per_this_image = fg_inds.shape[0]
bg_rois_per_this_image = bg_inds.shape[0]
else:
# Foreground
fg_inds = np.where(max_overlaps >= fg_thresh)[0]
fg_rois_per_this_image = np.minimum(fg_rois_per_im, fg_inds.shape[0])
# Sample foreground if there are too many
if (fg_inds.shape[0] > fg_rois_per_this_image) and use_random:
fg_inds = np.random.choice(
fg_inds, size=fg_rois_per_this_image, replace=False)
fg_inds = fg_inds[:fg_rois_per_this_image]
# Background
bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >=
bg_thresh_lo))[0]
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
bg_inds.shape[0])
# Sample background if there are too many
if (bg_inds.shape[0] > bg_rois_per_this_image) and use_random:
bg_inds = np.random.choice(
bg_inds, size=bg_rois_per_this_image, replace=False)
bg_inds = bg_inds[:bg_rois_per_this_image]
keep_inds = np.append(fg_inds, bg_inds)
sampled_labels = max_classes[keep_inds]
sampled_labels[fg_rois_per_this_image:] = 0
sampled_boxes = boxes[keep_inds]
sampled_gts = gt_boxes[box_to_gt_ind_map[keep_inds]]
sampled_gts[fg_rois_per_this_image:, :] = gt_boxes[0]
bbox_label_targets = compute_targets(sampled_boxes, sampled_gts,
sampled_labels, bbox_reg_weights)
bbox_targets, bbox_inside_weights = expand_bbox_targets(
bbox_label_targets, class_nums, is_cls_agnostic)
bbox_outside_weights = np.array(
bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype)
# Scale rois
sampled_rois = sampled_boxes * im_scale
# Faster RCNN blobs
frcn_blobs = dict(
rois=sampled_rois,
labels_int32=sampled_labels,
bbox_targets=bbox_targets,
bbox_inside_weights=bbox_inside_weights,
bbox_outside_weights=bbox_outside_weights)
#for k,v in frcn_blobs.items():
# print(k, v.shape)
return frcn_blobs
import sys
import math
import numpy as np
from paddle.fluid.dygraph.base import to_variable
def roi_pool(input_x, rois, pooled_height, pooled_width, spatial_scale):
input_x = input_x.numpy()
rois = rois.numpy()
batch_size, channels, height, width = input_x.shape
print("debug roi pool")
print("debug input feat: ", input_x.shape)
rois_num = rois.shape[1]
#out_data = np.zeros((rois_num, channels, pooled_height, pooled_width))
#argmax_data = np.zeros((rois_num, channels, pooled_height, pooled_width))
outs_list = []
for bi in range(batch_size):
out_data = np.zeros((rois_num, channels, pooled_height, pooled_width))
argmax_data = np.zeros(
(rois_num, channels, pooled_height, pooled_width))
for i in range(rois_num):
roi = rois[bi][i]
# roi_batch_id = int(roi[0])
roi_start_w = int(np.round(roi[0] * spatial_scale))
roi_start_h = int(np.round(roi[1] * spatial_scale))
roi_end_w = int(np.round(roi[2] * spatial_scale))
roi_end_h = int(np.round(roi[3] * spatial_scale))
roi_height = int(max(roi_end_h - roi_start_h + 1, 1))
roi_width = int(max(roi_end_w - roi_start_w + 1, 1))
x_i = input_x[bi] #input_x[roi_batch_id]
bin_size_h = float(roi_height) / float(pooled_height)
bin_size_w = float(roi_width) / float(pooled_width)
for c in range(channels):
for ph in range(pooled_height):
for pw in range(pooled_width):
hstart = int(math.floor(ph * bin_size_h))
wstart = int(math.floor(pw * bin_size_w))
hend = int(math.ceil((ph + 1) * bin_size_h))
wend = int(math.ceil((pw + 1) * bin_size_w))
hstart = min(max(hstart + roi_start_h, 0), height)
hend = min(max(hend + roi_start_h, 0), height)
wstart = min(max(wstart + roi_start_w, 0), width)
wend = min(max(wend + roi_start_w, 0), width)
is_empty = (hend <= hstart) or (wend <= wstart)
if is_empty:
out_data[i, c, ph, pw] = 0
else:
out_data[i, c, ph, pw] = -sys.float_info.max
argmax_data[i, c, ph, pw] = -1
for h in range(hstart, hend):
for w in range(wstart, wend):
if x_i[c, h, w] > out_data[i, c, ph, pw]:
out_data[i, c, ph, pw] = x_i[c, h, w]
argmax_data[i, c, ph, pw] = h * width + w
outs = out_data.astype('float32')
argmaxes = argmax_data.astype('int64')
outs_list.append(outs)
outs = np.asarray(outs_list, dtype=np.float32)
outs = to_variable(np.asarray(outs_list, dtype=np.float32))
return outs
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
def set_paddle_flags(flags):
for key, value in flags.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
set_paddle_flags({
'FLAGS_conv_workspace_size_limit': 500,
'FLAGS_eager_delete_tensor_gb': 0, # enable gc
'FLAGS_memory_fraction_of_eager_deletion': 1,
'FLAGS_fraction_of_gpu_memory_to_use': 0.98
})
import sys
import numpy as np
import time
import shutil
from utility import parse_args, print_arguments, SmoothedValue, TrainingStats, now_time, check_gpu
import collections
import paddle
import paddle.fluid as fluid
import reader
import models.model_builder as model_builder
import models.resnet as resnet
from learning_rate import exponential_with_warmup_decay
from config import cfg
import dist_utils
num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
def get_device_num():
# NOTE(zcd): for multi-processe training, each process use one GPU card.
if num_trainers > 1:
return 1
return fluid.core.get_cuda_device_count()
def train():
learning_rate = cfg.learning_rate
image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size]
if cfg.enable_ce:
fluid.default_startup_program().random_seed = 1000
fluid.default_main_program().random_seed = 1000
import random
random.seed(0)
np.random.seed(0)
devices_num = get_device_num()
total_batch_size = devices_num * cfg.TRAIN.im_per_batch
use_random = True
if cfg.enable_ce:
use_random = False
model = model_builder.RCNN(
add_conv_body_func=resnet.add_ResNet50_conv4_body,
add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head,
use_pyreader=cfg.use_pyreader,
use_random=use_random)
model.build_model(image_shape)
losses, keys = model.loss()
loss = losses[0]
fetch_list = losses
boundaries = cfg.lr_steps
gamma = cfg.lr_gamma
step_num = len(cfg.lr_steps)
values = [learning_rate * (gamma**i) for i in range(step_num + 1)]
lr = exponential_with_warmup_decay(
learning_rate=learning_rate,
boundaries=boundaries,
values=values,
warmup_iter=cfg.warm_up_iter,
warmup_factor=cfg.warm_up_factor)
optimizer = fluid.optimizer.Momentum(
learning_rate=lr,
regularization=fluid.regularizer.L2Decay(cfg.weight_decay),
momentum=cfg.momentum)
optimizer.minimize(loss)
fetch_list = fetch_list + [lr]
for var in fetch_list:
var.persistable = True
gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
if cfg.pretrained_model:
def if_exist(var):
return os.path.exists(os.path.join(cfg.pretrained_model, var.name))
fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)
if cfg.parallel:
build_strategy = fluid.BuildStrategy()
build_strategy.memory_optimize = False
build_strategy.enable_inplace = True
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.num_iteration_per_drop_scope = 10
if num_trainers > 1 and cfg.use_gpu:
dist_utils.prepare_for_multi_process(exe, build_strategy,
fluid.default_main_program())
# NOTE: the process is fast when num_threads is 1
# for multi-process training.
exec_strategy.num_threads = 1
train_exe = fluid.ParallelExecutor(
use_cuda=bool(cfg.use_gpu),
loss_name=loss.name,
build_strategy=build_strategy,
exec_strategy=exec_strategy)
else:
train_exe = exe
shuffle = True
if cfg.enable_ce:
shuffle = False
# NOTE: do not shuffle dataset when using multi-process training
shuffle_seed = None
if num_trainers > 1:
shuffle_seed = 1
if cfg.use_pyreader:
train_reader = reader.train(
batch_size=cfg.TRAIN.im_per_batch,
total_batch_size=total_batch_size,
padding_total=cfg.TRAIN.padding_minibatch,
shuffle=shuffle,
shuffle_seed=shuffle_seed)
if num_trainers > 1:
assert shuffle_seed is not None, \
"If num_trainers > 1, the shuffle_seed must be set, because " \
"the order of batch data generated by reader " \
"must be the same in the respective processes."
# NOTE: the order of batch data generated by batch_reader
# must be the same in the respective processes.
if num_trainers > 1:
train_reader = fluid.contrib.reader.distributed_batch_reader(
train_reader)
py_reader = model.py_reader
py_reader.decorate_paddle_reader(train_reader)
else:
if num_trainers > 1: shuffle = False
train_reader = reader.train(
batch_size=total_batch_size, shuffle=shuffle)
feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())
def save_model(postfix):
model_path = os.path.join(cfg.model_save_dir, postfix)
if os.path.isdir(model_path):
shutil.rmtree(model_path)
fluid.io.save_persistables(exe, model_path)
def train_loop_pyreader():
py_reader.start()
train_stats = TrainingStats(cfg.log_window, keys)
try:
start_time = time.time()
prev_start_time = start_time
for iter_id in range(cfg.max_iter):
prev_start_time = start_time
start_time = time.time()
outs = train_exe.run(fetch_list=[v.name for v in fetch_list])
stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])}
train_stats.update(stats)
logs = train_stats.log()
strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format(
now_time(), iter_id,
np.mean(outs[-1]), logs, start_time - prev_start_time)
print(strs)
sys.stdout.flush()
if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
save_model("model_iter{}".format(iter_id))
end_time = time.time()
total_time = end_time - start_time
last_loss = np.array(outs[0]).mean()
if cfg.enable_ce:
gpu_num = devices_num
epoch_idx = iter_id + 1
loss = last_loss
print("kpis\teach_pass_duration_card%s\t%s" %
(gpu_num, total_time / epoch_idx))
print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, loss))
except (StopIteration, fluid.core.EOFException):
py_reader.reset()
def train_loop():
start_time = time.time()
prev_start_time = start_time
start = start_time
train_stats = TrainingStats(cfg.log_window, keys)
for iter_id, data in enumerate(train_reader()):
prev_start_time = start_time
start_time = time.time()
outs = train_exe.run(fetch_list=[v.name for v in fetch_list],
feed=feeder.feed(data))
stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])}
train_stats.update(stats)
logs = train_stats.log()
strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format(
now_time(), iter_id,
np.mean(outs[-1]), logs, start_time - prev_start_time)
print(strs)
sys.stdout.flush()
if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
save_model("model_iter{}".format(iter_id))
if (iter_id + 1) == cfg.max_iter:
break
end_time = time.time()
total_time = end_time - start_time
last_loss = np.array(outs[0]).mean()
# only for ce
if cfg.enable_ce:
gpu_num = devices_num
epoch_idx = iter_id + 1
loss = last_loss
print("kpis\teach_pass_duration_card%s\t%s" %
(gpu_num, total_time / epoch_idx))
print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, loss))
if cfg.use_pyreader:
train_loop_pyreader()
else:
train_loop()
save_model('model_final')
if __name__ == '__main__':
args = parse_args()
print_arguments(args)
check_gpu(args.use_gpu)
train()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册