提交 5d4c75ff 编写于 作者: F FlyingQianMM

add hrnet for classifier and faster rcnn

上级 278994a3
...@@ -36,5 +36,6 @@ DenseNet121 = cv.models.DenseNet121 ...@@ -36,5 +36,6 @@ DenseNet121 = cv.models.DenseNet121
DenseNet161 = cv.models.DenseNet161 DenseNet161 = cv.models.DenseNet161
DenseNet201 = cv.models.DenseNet201 DenseNet201 = cv.models.DenseNet201
ShuffleNetV2 = cv.models.ShuffleNetV2 ShuffleNetV2 = cv.models.ShuffleNetV2
HRNet_W18 = cv.models.HRNet_W18
transforms = cv.transforms.cls_transforms transforms = cv.transforms.cls_transforms
...@@ -34,11 +34,13 @@ from .classifier import DenseNet121 ...@@ -34,11 +34,13 @@ from .classifier import DenseNet121
from .classifier import DenseNet161 from .classifier import DenseNet161
from .classifier import DenseNet201 from .classifier import DenseNet201
from .classifier import ShuffleNetV2 from .classifier import ShuffleNetV2
from .classifier import HRNet_W18
from .base import BaseAPI from .base import BaseAPI
from .yolo_v3 import YOLOv3 from .yolo_v3 import YOLOv3
from .faster_rcnn import FasterRCNN from .faster_rcnn import FasterRCNN
from .mask_rcnn import MaskRCNN from .mask_rcnn import MaskRCNN
from .unet import UNet from .unet import UNet
from .deeplabv3p import DeepLabv3p from .deeplabv3p import DeepLabv3p
from .hrnet import HRNet
from .load_model import load_model from .load_model import load_model
from .slim import prune from .slim import prune
...@@ -31,6 +31,8 @@ from collections import OrderedDict ...@@ -31,6 +31,8 @@ from collections import OrderedDict
from os import path as osp from os import path as osp
from paddle.fluid.framework import Program from paddle.fluid.framework import Program
from .utils.pretrain_weights import get_pretrain_weights from .utils.pretrain_weights import get_pretrain_weights
fluid.default_startup_program().random_seed = 1000
fluid.default_main_program().random_seed = 1000
def dict2str(dict_input): def dict2str(dict_input):
...@@ -79,9 +81,9 @@ class BaseAPI: ...@@ -79,9 +81,9 @@ class BaseAPI:
return int(batch_size // len(self.places)) return int(batch_size // len(self.places))
else: else:
raise Exception("Please support correct batch_size, \ raise Exception("Please support correct batch_size, \
which can be divided by available cards({}) in {}". which can be divided by available cards({}) in {}"
format(paddlex.env_info['num'], .format(paddlex.env_info['num'], paddlex.env_info[
paddlex.env_info['place'])) 'place']))
def build_program(self): def build_program(self):
# 构建训练网络 # 构建训练网络
...@@ -210,8 +212,8 @@ class BaseAPI: ...@@ -210,8 +212,8 @@ class BaseAPI:
paddlex.utils.utils.load_pretrain_weights( paddlex.utils.utils.load_pretrain_weights(
self.exe, self.train_prog, resume_checkpoint, resume=True) self.exe, self.train_prog, resume_checkpoint, resume=True)
if not osp.exists(osp.join(resume_checkpoint, "model.yml")): if not osp.exists(osp.join(resume_checkpoint, "model.yml")):
raise Exception( raise Exception("There's not model.yml in {}".format(
"There's not model.yml in {}".format(resume_checkpoint)) resume_checkpoint))
with open(osp.join(resume_checkpoint, "model.yml")) as f: with open(osp.join(resume_checkpoint, "model.yml")) as f:
info = yaml.load(f.read(), Loader=yaml.Loader) info = yaml.load(f.read(), Loader=yaml.Loader)
self.completed_epochs = info['completed_epochs'] self.completed_epochs = info['completed_epochs']
...@@ -269,13 +271,13 @@ class BaseAPI: ...@@ -269,13 +271,13 @@ class BaseAPI:
except: except:
pass pass
if hasattr(self.test_transforms, 'to_rgb'):
if self.test_transforms.to_rgb:
info['TransformsMode'] = 'RGB'
else:
info['TransformsMode'] = 'BGR'
if hasattr(self, 'test_transforms'): if hasattr(self, 'test_transforms'):
if hasattr(self.test_transforms, 'to_rgb'):
if self.test_transforms.to_rgb:
info['TransformsMode'] = 'RGB'
else:
info['TransformsMode'] = 'BGR'
if self.test_transforms is not None: if self.test_transforms is not None:
info['Transforms'] = list() info['Transforms'] = list()
for op in self.test_transforms.transforms: for op in self.test_transforms.transforms:
...@@ -362,8 +364,8 @@ class BaseAPI: ...@@ -362,8 +364,8 @@ class BaseAPI:
# 模型保存成功的标志 # 模型保存成功的标志
open(osp.join(save_dir, '.success'), 'w').close() open(osp.join(save_dir, '.success'), 'w').close()
logging.info( logging.info("Model for inference deploy saved in {}.".format(
"Model for inference deploy saved in {}.".format(save_dir)) save_dir))
def train_loop(self, def train_loop(self,
num_epochs, num_epochs,
...@@ -377,7 +379,8 @@ class BaseAPI: ...@@ -377,7 +379,8 @@ class BaseAPI:
early_stop=False, early_stop=False,
early_stop_patience=5): early_stop_patience=5):
if train_dataset.num_samples < train_batch_size: if train_dataset.num_samples < train_batch_size:
raise Exception('The amount of training datset must be larger than batch size.') raise Exception(
'The amount of training datset must be larger than batch size.')
if not osp.isdir(save_dir): if not osp.isdir(save_dir):
if osp.exists(save_dir): if osp.exists(save_dir):
os.remove(save_dir) os.remove(save_dir)
...@@ -415,8 +418,8 @@ class BaseAPI: ...@@ -415,8 +418,8 @@ class BaseAPI:
build_strategy=build_strategy, build_strategy=build_strategy,
exec_strategy=exec_strategy) exec_strategy=exec_strategy)
total_num_steps = math.floor( total_num_steps = math.floor(train_dataset.num_samples /
train_dataset.num_samples / train_batch_size) train_batch_size)
num_steps = 0 num_steps = 0
time_stat = list() time_stat = list()
time_train_one_epoch = None time_train_one_epoch = None
...@@ -430,8 +433,8 @@ class BaseAPI: ...@@ -430,8 +433,8 @@ class BaseAPI:
if self.model_type == 'detector': if self.model_type == 'detector':
eval_batch_size = self._get_single_card_bs(train_batch_size) eval_batch_size = self._get_single_card_bs(train_batch_size)
if eval_dataset is not None: if eval_dataset is not None:
total_num_steps_eval = math.ceil( total_num_steps_eval = math.ceil(eval_dataset.num_samples /
eval_dataset.num_samples / eval_batch_size) eval_batch_size)
if use_vdl: if use_vdl:
# VisualDL component # VisualDL component
...@@ -473,7 +476,9 @@ class BaseAPI: ...@@ -473,7 +476,9 @@ class BaseAPI:
if use_vdl: if use_vdl:
for k, v in step_metrics.items(): for k, v in step_metrics.items():
log_writer.add_scalar('Metrics/Training(Step): {}'.format(k), v, num_steps) log_writer.add_scalar(
'Metrics/Training(Step): {}'.format(k), v,
num_steps)
# 估算剩余时间 # 估算剩余时间
avg_step_time = np.mean(time_stat) avg_step_time = np.mean(time_stat)
...@@ -481,11 +486,12 @@ class BaseAPI: ...@@ -481,11 +486,12 @@ class BaseAPI:
eta = (num_epochs - i - 1) * time_train_one_epoch + ( eta = (num_epochs - i - 1) * time_train_one_epoch + (
total_num_steps - step - 1) * avg_step_time total_num_steps - step - 1) * avg_step_time
else: else:
eta = ((num_epochs - i) * total_num_steps - step - eta = ((num_epochs - i) * total_num_steps - step - 1
1) * avg_step_time ) * avg_step_time
if time_eval_one_epoch is not None: if time_eval_one_epoch is not None:
eval_eta = (total_eval_times - i // eval_eta = (
save_interval_epochs) * time_eval_one_epoch total_eval_times - i // save_interval_epochs
) * time_eval_one_epoch
else: else:
eval_eta = ( eval_eta = (
total_eval_times - i // save_interval_epochs total_eval_times - i // save_interval_epochs
...@@ -495,10 +501,11 @@ class BaseAPI: ...@@ -495,10 +501,11 @@ class BaseAPI:
logging.info( logging.info(
"[TRAIN] Epoch={}/{}, Step={}/{}, {}, time_each_step={}s, eta={}" "[TRAIN] Epoch={}/{}, Step={}/{}, {}, time_each_step={}s, eta={}"
.format(i + 1, num_epochs, step + 1, total_num_steps, .format(i + 1, num_epochs, step + 1, total_num_steps,
dict2str(step_metrics), round( dict2str(step_metrics),
avg_step_time, 2), eta_str)) round(avg_step_time, 2), eta_str))
train_metrics = OrderedDict( train_metrics = OrderedDict(
zip(list(self.train_outputs.keys()), np.mean(records, axis=0))) zip(list(self.train_outputs.keys()), np.mean(
records, axis=0)))
logging.info('[TRAIN] Epoch {} finished, {} .'.format( logging.info('[TRAIN] Epoch {} finished, {} .'.format(
i + 1, dict2str(train_metrics))) i + 1, dict2str(train_metrics)))
time_train_one_epoch = time.time() - epoch_start_time time_train_one_epoch = time.time() - epoch_start_time
...@@ -534,7 +541,8 @@ class BaseAPI: ...@@ -534,7 +541,8 @@ class BaseAPI:
if isinstance(v, np.ndarray): if isinstance(v, np.ndarray):
if v.size > 1: if v.size > 1:
continue continue
log_writer.add_scalar("Metrics/Eval(Epoch): {}".format(k), v, i+1) log_writer.add_scalar(
"Metrics/Eval(Epoch): {}".format(k), v, i + 1)
self.save_model(save_dir=current_save_dir) self.save_model(save_dir=current_save_dir)
time_eval_one_epoch = time.time() - eval_epoch_start_time time_eval_one_epoch = time.time() - eval_epoch_start_time
eval_epoch_start_time = time.time() eval_epoch_start_time = time.time()
...@@ -545,4 +553,4 @@ class BaseAPI: ...@@ -545,4 +553,4 @@ class BaseAPI:
best_accuracy)) best_accuracy))
if eval_dataset is not None and early_stop: if eval_dataset is not None and early_stop:
if earlystop(current_accuracy): if earlystop(current_accuracy):
break break
\ No newline at end of file
...@@ -40,8 +40,8 @@ class BaseClassifier(BaseAPI): ...@@ -40,8 +40,8 @@ class BaseClassifier(BaseAPI):
self.init_params = locals() self.init_params = locals()
super(BaseClassifier, self).__init__('classifier') super(BaseClassifier, self).__init__('classifier')
if not hasattr(paddlex.cv.nets, str.lower(model_name)): if not hasattr(paddlex.cv.nets, str.lower(model_name)):
raise Exception( raise Exception("ERROR: There's no model named {}.".format(
"ERROR: There's no model named {}.".format(model_name)) model_name))
self.model_name = model_name self.model_name = model_name
self.labels = None self.labels = None
self.num_classes = num_classes self.num_classes = num_classes
...@@ -218,15 +218,14 @@ class BaseClassifier(BaseAPI): ...@@ -218,15 +218,14 @@ class BaseClassifier(BaseAPI):
num_pad_samples = batch_size - num_samples num_pad_samples = batch_size - num_samples
pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1)) pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1))
images = np.concatenate([images, pad_images]) images = np.concatenate([images, pad_images])
outputs = self.exe.run( outputs = self.exe.run(self.parallel_test_prog,
self.parallel_test_prog, feed={'image': images},
feed={'image': images}, fetch_list=list(self.test_outputs.values()))
fetch_list=list(self.test_outputs.values()))
outputs = [outputs[0][:num_samples]] outputs = [outputs[0][:num_samples]]
true_labels.extend(labels) true_labels.extend(labels)
pred_scores.extend(outputs[0].tolist()) pred_scores.extend(outputs[0].tolist())
logging.debug("[EVAL] Epoch={}, Step={}/{}".format( logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
epoch_id, step + 1, total_steps)) 1, total_steps))
pred_top1_label = np.argsort(pred_scores)[:, -1] pred_top1_label = np.argsort(pred_scores)[:, -1]
pred_topk_label = np.argsort(pred_scores)[:, -k:] pred_topk_label = np.argsort(pred_scores)[:, -k:]
...@@ -263,10 +262,9 @@ class BaseClassifier(BaseAPI): ...@@ -263,10 +262,9 @@ class BaseClassifier(BaseAPI):
self.arrange_transforms( self.arrange_transforms(
transforms=self.test_transforms, mode='test') transforms=self.test_transforms, mode='test')
im = self.test_transforms(img_file) im = self.test_transforms(img_file)
result = self.exe.run( result = self.exe.run(self.test_prog,
self.test_prog, feed={'image': im},
feed={'image': im}, fetch_list=list(self.test_outputs.values()))
fetch_list=list(self.test_outputs.values()))
pred_label = np.argsort(result[0][0])[::-1][:true_topk] pred_label = np.argsort(result[0][0])[::-1][:true_topk]
res = [{ res = [{
'category_id': l, 'category_id': l,
...@@ -400,3 +398,9 @@ class ShuffleNetV2(BaseClassifier): ...@@ -400,3 +398,9 @@ class ShuffleNetV2(BaseClassifier):
def __init__(self, num_classes=1000): def __init__(self, num_classes=1000):
super(ShuffleNetV2, self).__init__( super(ShuffleNetV2, self).__init__(
model_name='ShuffleNetV2', num_classes=num_classes) model_name='ShuffleNetV2', num_classes=num_classes)
class HRNet_W18(BaseClassifier):
def __init__(self, num_classes=1000):
super(HRNet_W18, self).__init__(
model_name='HRNet_W18', num_classes=num_classes)
...@@ -32,7 +32,7 @@ class FasterRCNN(BaseAPI): ...@@ -32,7 +32,7 @@ class FasterRCNN(BaseAPI):
Args: Args:
num_classes (int): 包含了背景类的类别数。默认为81。 num_classes (int): 包含了背景类的类别数。默认为81。
backbone (str): FasterRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', backbone (str): FasterRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50',
'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。
with_fpn (bool): 是否使用FPN结构。默认为True。 with_fpn (bool): 是否使用FPN结构。默认为True。
aspect_ratios (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。 aspect_ratios (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。
anchor_sizes (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。 anchor_sizes (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。
...@@ -47,7 +47,8 @@ class FasterRCNN(BaseAPI): ...@@ -47,7 +47,8 @@ class FasterRCNN(BaseAPI):
self.init_params = locals() self.init_params = locals()
super(FasterRCNN, self).__init__('detector') super(FasterRCNN, self).__init__('detector')
backbones = [ backbones = [
'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd' 'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd',
'HRNet_W18'
] ]
assert backbone in backbones, "backbone should be one of {}".format( assert backbone in backbones, "backbone should be one of {}".format(
backbones) backbones)
...@@ -79,6 +80,12 @@ class FasterRCNN(BaseAPI): ...@@ -79,6 +80,12 @@ class FasterRCNN(BaseAPI):
layers = 101 layers = 101
variant = 'd' variant = 'd'
norm_type = 'affine_channel' norm_type = 'affine_channel'
elif backbone_name == 'HRNet_W18':
backbone = paddlex.cv.nets.hrnet.HRNet(
width=18, freeze_norm=True, norm_decay=0., freeze_at=0)
if self.with_fpn is False:
self.with_fpn = True
return backbone
if self.with_fpn: if self.with_fpn:
backbone = paddlex.cv.nets.resnet.ResNet( backbone = paddlex.cv.nets.resnet.ResNet(
norm_type='bn' if norm_type is None else norm_type, norm_type='bn' if norm_type is None else norm_type,
...@@ -117,12 +124,12 @@ class FasterRCNN(BaseAPI): ...@@ -117,12 +124,12 @@ class FasterRCNN(BaseAPI):
model_out = model.build_net(inputs) model_out = model.build_net(inputs)
loss = model_out['loss'] loss = model_out['loss']
self.optimizer.minimize(loss) self.optimizer.minimize(loss)
outputs = OrderedDict([('loss', model_out['loss']), outputs = OrderedDict(
('loss_cls', model_out['loss_cls']), [('loss', model_out['loss']),
('loss_bbox', model_out['loss_bbox']), ('loss_cls', model_out['loss_cls']),
('loss_rpn_cls', model_out['loss_rpn_cls']), ('loss_bbox', model_out['loss_bbox']),
('loss_rpn_bbox', ('loss_rpn_cls', model_out['loss_rpn_cls']), (
model_out['loss_rpn_bbox'])]) 'loss_rpn_bbox', model_out['loss_rpn_bbox'])])
else: else:
outputs = model.build_net(inputs) outputs = model.build_net(inputs)
return inputs, outputs return inputs, outputs
...@@ -227,7 +234,9 @@ class FasterRCNN(BaseAPI): ...@@ -227,7 +234,9 @@ class FasterRCNN(BaseAPI):
# 构建训练、验证、测试网络 # 构建训练、验证、测试网络
self.build_program() self.build_program()
fuse_bn = True fuse_bn = True
if self.with_fpn and self.backbone in ['ResNet18', 'ResNet50']: if self.with_fpn and self.backbone in [
'ResNet18', 'ResNet50', 'HRNet_W18'
]:
fuse_bn = False fuse_bn = False
self.net_initialize( self.net_initialize(
startup_prog=fluid.default_startup_program(), startup_prog=fluid.default_startup_program(),
...@@ -310,11 +319,10 @@ class FasterRCNN(BaseAPI): ...@@ -310,11 +319,10 @@ class FasterRCNN(BaseAPI):
'im_info': im_infos, 'im_info': im_infos,
'im_shape': im_shapes, 'im_shape': im_shapes,
} }
outputs = self.exe.run( outputs = self.exe.run(self.test_prog,
self.test_prog, feed=[feed_data],
feed=[feed_data], fetch_list=list(self.test_outputs.values()),
fetch_list=list(self.test_outputs.values()), return_numpy=False)
return_numpy=False)
res = { res = {
'bbox': (np.array(outputs[0]), 'bbox': (np.array(outputs[0]),
outputs[0].recursive_sequence_lengths()) outputs[0].recursive_sequence_lengths())
...@@ -339,13 +347,13 @@ class FasterRCNN(BaseAPI): ...@@ -339,13 +347,13 @@ class FasterRCNN(BaseAPI):
res['is_difficult'] = (np.array(res_is_difficult), res['is_difficult'] = (np.array(res_is_difficult),
[res_is_difficult_lod]) [res_is_difficult_lod])
results.append(res) results.append(res)
logging.debug("[EVAL] Epoch={}, Step={}/{}".format( logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
epoch_id, step + 1, total_steps)) 1, total_steps))
box_ap_stats, eval_details = eval_results( box_ap_stats, eval_details = eval_results(
results, metric, eval_dataset.coco_gt, with_background=True) results, metric, eval_dataset.coco_gt, with_background=True)
metrics = OrderedDict( metrics = OrderedDict(
zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'], zip(['bbox_mmap'
box_ap_stats)) if metric == 'COCO' else 'bbox_map'], box_ap_stats))
if return_details: if return_details:
return metrics, eval_details return metrics, eval_details
return metrics return metrics
...@@ -373,15 +381,14 @@ class FasterRCNN(BaseAPI): ...@@ -373,15 +381,14 @@ class FasterRCNN(BaseAPI):
im = np.expand_dims(im, axis=0) im = np.expand_dims(im, axis=0)
im_resize_info = np.expand_dims(im_resize_info, axis=0) im_resize_info = np.expand_dims(im_resize_info, axis=0)
im_shape = np.expand_dims(im_shape, axis=0) im_shape = np.expand_dims(im_shape, axis=0)
outputs = self.exe.run( outputs = self.exe.run(self.test_prog,
self.test_prog, feed={
feed={ 'image': im,
'image': im, 'im_info': im_resize_info,
'im_info': im_resize_info, 'im_shape': im_shape
'im_shape': im_shape },
}, fetch_list=list(self.test_outputs.values()),
fetch_list=list(self.test_outputs.values()), return_numpy=False)
return_numpy=False)
res = { res = {
k: (np.array(v), v.recursive_sequence_lengths()) k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(list(self.test_outputs.keys()), outputs) for k, v in zip(list(self.test_outputs.keys()), outputs)
......
...@@ -32,7 +32,7 @@ class MaskRCNN(FasterRCNN): ...@@ -32,7 +32,7 @@ class MaskRCNN(FasterRCNN):
Args: Args:
num_classes (int): 包含了背景类的类别数。默认为81。 num_classes (int): 包含了背景类的类别数。默认为81。
backbone (str): MaskRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', backbone (str): MaskRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50',
'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。
with_fpn (bool): 是否使用FPN结构。默认为True。 with_fpn (bool): 是否使用FPN结构。默认为True。
aspect_ratios (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。 aspect_ratios (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。
anchor_sizes (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。 anchor_sizes (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。
...@@ -46,7 +46,8 @@ class MaskRCNN(FasterRCNN): ...@@ -46,7 +46,8 @@ class MaskRCNN(FasterRCNN):
anchor_sizes=[32, 64, 128, 256, 512]): anchor_sizes=[32, 64, 128, 256, 512]):
self.init_params = locals() self.init_params = locals()
backbones = [ backbones = [
'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd' 'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd',
'HRNet_W18'
] ]
assert backbone in backbones, "backbone should be one of {}".format( assert backbone in backbones, "backbone should be one of {}".format(
backbones) backbones)
...@@ -81,13 +82,13 @@ class MaskRCNN(FasterRCNN): ...@@ -81,13 +82,13 @@ class MaskRCNN(FasterRCNN):
model_out = model.build_net(inputs) model_out = model.build_net(inputs)
loss = model_out['loss'] loss = model_out['loss']
self.optimizer.minimize(loss) self.optimizer.minimize(loss)
outputs = OrderedDict([('loss', model_out['loss']), outputs = OrderedDict(
('loss_cls', model_out['loss_cls']), [('loss', model_out['loss']),
('loss_bbox', model_out['loss_bbox']), ('loss_cls', model_out['loss_cls']),
('loss_mask', model_out['loss_mask']), ('loss_bbox', model_out['loss_bbox']),
('loss_rpn_cls', model_out['loss_rpn_cls']), ('loss_mask', model_out['loss_mask']),
('loss_rpn_bbox', ('loss_rpn_cls', model_out['loss_rpn_cls']), (
model_out['loss_rpn_bbox'])]) 'loss_rpn_bbox', model_out['loss_rpn_bbox'])])
else: else:
outputs = model.build_net(inputs) outputs = model.build_net(inputs)
return inputs, outputs return inputs, outputs
...@@ -276,11 +277,10 @@ class MaskRCNN(FasterRCNN): ...@@ -276,11 +277,10 @@ class MaskRCNN(FasterRCNN):
'im_info': im_infos, 'im_info': im_infos,
'im_shape': im_shapes, 'im_shape': im_shapes,
} }
outputs = self.exe.run( outputs = self.exe.run(self.test_prog,
self.test_prog, feed=[feed_data],
feed=[feed_data], fetch_list=list(self.test_outputs.values()),
fetch_list=list(self.test_outputs.values()), return_numpy=False)
return_numpy=False)
res = { res = {
'bbox': (np.array(outputs[0]), 'bbox': (np.array(outputs[0]),
outputs[0].recursive_sequence_lengths()), outputs[0].recursive_sequence_lengths()),
...@@ -292,8 +292,8 @@ class MaskRCNN(FasterRCNN): ...@@ -292,8 +292,8 @@ class MaskRCNN(FasterRCNN):
res['im_shape'] = (im_shapes, []) res['im_shape'] = (im_shapes, [])
res['im_id'] = (np.array(res_im_id), []) res['im_id'] = (np.array(res_im_id), [])
results.append(res) results.append(res)
logging.debug("[EVAL] Epoch={}, Step={}/{}".format( logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
epoch_id, step + 1, total_steps)) 1, total_steps))
ap_stats, eval_details = eval_results( ap_stats, eval_details = eval_results(
results, results,
...@@ -302,8 +302,8 @@ class MaskRCNN(FasterRCNN): ...@@ -302,8 +302,8 @@ class MaskRCNN(FasterRCNN):
with_background=True, with_background=True,
resolution=self.mask_head_resolution) resolution=self.mask_head_resolution)
if metric == 'VOC': if metric == 'VOC':
if isinstance(ap_stats[0], np.ndarray) and isinstance( if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1],
ap_stats[1], np.ndarray): np.ndarray):
metrics = OrderedDict( metrics = OrderedDict(
zip(['bbox_map', 'segm_map'], zip(['bbox_map', 'segm_map'],
[ap_stats[0][1], ap_stats[1][1]])) [ap_stats[0][1], ap_stats[1][1]]))
...@@ -311,8 +311,8 @@ class MaskRCNN(FasterRCNN): ...@@ -311,8 +311,8 @@ class MaskRCNN(FasterRCNN):
metrics = OrderedDict( metrics = OrderedDict(
zip(['bbox_map', 'segm_map'], [0.0, 0.0])) zip(['bbox_map', 'segm_map'], [0.0, 0.0]))
elif metric == 'COCO': elif metric == 'COCO':
if isinstance(ap_stats[0], np.ndarray) and isinstance( if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1],
ap_stats[1], np.ndarray): np.ndarray):
metrics = OrderedDict( metrics = OrderedDict(
zip(['bbox_mmap', 'segm_mmap'], zip(['bbox_mmap', 'segm_mmap'],
[ap_stats[0][0], ap_stats[1][0]])) [ap_stats[0][0], ap_stats[1][0]]))
...@@ -346,15 +346,14 @@ class MaskRCNN(FasterRCNN): ...@@ -346,15 +346,14 @@ class MaskRCNN(FasterRCNN):
im = np.expand_dims(im, axis=0) im = np.expand_dims(im, axis=0)
im_resize_info = np.expand_dims(im_resize_info, axis=0) im_resize_info = np.expand_dims(im_resize_info, axis=0)
im_shape = np.expand_dims(im_shape, axis=0) im_shape = np.expand_dims(im_shape, axis=0)
outputs = self.exe.run( outputs = self.exe.run(self.test_prog,
self.test_prog, feed={
feed={ 'image': im,
'image': im, 'im_info': im_resize_info,
'im_info': im_resize_info, 'im_shape': im_shape
'im_shape': im_shape },
}, fetch_list=list(self.test_outputs.values()),
fetch_list=list(self.test_outputs.values()), return_numpy=False)
return_numpy=False)
res = { res = {
k: (np.array(v), v.recursive_sequence_lengths()) k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(list(self.test_outputs.keys()), outputs) for k, v in zip(list(self.test_outputs.keys()), outputs)
...@@ -368,8 +367,8 @@ class MaskRCNN(FasterRCNN): ...@@ -368,8 +367,8 @@ class MaskRCNN(FasterRCNN):
import pycocotools.mask as mask_util import pycocotools.mask as mask_util
for index, xywh_res in enumerate(xywh_results): for index, xywh_res in enumerate(xywh_results):
del xywh_res['image_id'] del xywh_res['image_id']
xywh_res['mask'] = mask_util.decode( xywh_res['mask'] = mask_util.decode(segm_results[index][
segm_results[index]['segmentation']) 'segmentation'])
xywh_res['category'] = self.labels[xywh_res['category_id']] xywh_res['category'] = self.labels[xywh_res['category_id']]
results.append(xywh_res) results.append(xywh_res)
return results return results
...@@ -56,6 +56,8 @@ image_pretrain = { ...@@ -56,6 +56,8 @@ image_pretrain = {
'https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_deeplab_pretrained.tar', 'https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_deeplab_pretrained.tar',
'ShuffleNetV2': 'ShuffleNetV2':
'https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar', 'https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar',
'HRNet_W18':
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W18_C_pretrained.tar',
} }
coco_pretrain = { coco_pretrain = {
...@@ -85,10 +87,11 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir): ...@@ -85,10 +87,11 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir):
backbone = 'DetResNet50' backbone = 'DetResNet50'
assert backbone in image_pretrain, "There is not ImageNet pretrain weights for {}, you may try COCO.".format( assert backbone in image_pretrain, "There is not ImageNet pretrain weights for {}, you may try COCO.".format(
backbone) backbone)
# url = image_pretrain[backbone] if backbone == "HRNet_W18":
# fname = osp.split(url)[-1].split('.')[0] url = image_pretrain[backbone]
# paddlex.utils.download_and_decompress(url, path=new_save_dir) fname = osp.split(url)[-1].split('.')[0]
# return osp.join(new_save_dir, fname) paddlex.utils.download_and_decompress(url, path=new_save_dir)
return osp.join(new_save_dir, fname)
try: try:
hub.download(backbone, save_path=new_save_dir) hub.download(backbone, save_path=new_save_dir)
except Exception as e: except Exception as e:
......
...@@ -23,6 +23,7 @@ from .segmentation import DeepLabv3p ...@@ -23,6 +23,7 @@ from .segmentation import DeepLabv3p
from .xception import Xception from .xception import Xception
from .densenet import DenseNet from .densenet import DenseNet
from .shufflenet_v2 import ShuffleNetV2 from .shufflenet_v2 import ShuffleNetV2
from .hrnet import HRNet
def resnet18(input, num_classes=1000): def resnet18(input, num_classes=1000):
...@@ -51,14 +52,20 @@ def resnet50_vd(input, num_classes=1000): ...@@ -51,14 +52,20 @@ def resnet50_vd(input, num_classes=1000):
def resnet50_vd_ssld(input, num_classes=1000): def resnet50_vd_ssld(input, num_classes=1000):
model = ResNet(layers=50, num_classes=num_classes, model = ResNet(
variant='d', lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3]) layers=50,
num_classes=num_classes,
variant='d',
lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3])
return model(input) return model(input)
def resnet101_vd_ssld(input, num_classes=1000): def resnet101_vd_ssld(input, num_classes=1000):
model = ResNet(layers=101, num_classes=num_classes, model = ResNet(
variant='d', lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3]) layers=101,
num_classes=num_classes,
variant='d',
lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3])
return model(input) return model(input)
...@@ -93,14 +100,18 @@ def mobilenetv3_large(input, num_classes=1000): ...@@ -93,14 +100,18 @@ def mobilenetv3_large(input, num_classes=1000):
def mobilenetv3_small_ssld(input, num_classes=1000): def mobilenetv3_small_ssld(input, num_classes=1000):
model = MobileNetV3(num_classes=num_classes, model_name='small', model = MobileNetV3(
lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75]) num_classes=num_classes,
model_name='small',
lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75])
return model(input) return model(input)
def mobilenetv3_large_ssld(input, num_classes=1000): def mobilenetv3_large_ssld(input, num_classes=1000):
model = MobileNetV3(num_classes=num_classes, model_name='large', model = MobileNetV3(
lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75]) num_classes=num_classes,
model_name='large',
lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75])
return model(input) return model(input)
...@@ -133,6 +144,12 @@ def densenet201(input, num_classes=1000): ...@@ -133,6 +144,12 @@ def densenet201(input, num_classes=1000):
model = DenseNet(layers=201, num_classes=num_classes) model = DenseNet(layers=201, num_classes=num_classes)
return model(input) return model(input)
def shufflenetv2(input, num_classes=1000): def shufflenetv2(input, num_classes=1000):
model = ShuffleNetV2(num_classes=num_classes) model = ShuffleNetV2(num_classes=num_classes)
return model(input) return model(input)
def hrnet_w18(input, num_classes=1000):
model = HRNet(width=18, num_classes=num_classes)
return model(input)
...@@ -21,7 +21,7 @@ import copy ...@@ -21,7 +21,7 @@ import copy
from paddle import fluid from paddle import fluid
from .fpn import FPN from .fpn import (FPN, HRFPN)
from .rpn_head import (RPNHead, FPNRPNHead) from .rpn_head import (RPNHead, FPNRPNHead)
from .roi_extractor import (RoIAlign, FPNRoIAlign) from .roi_extractor import (RoIAlign, FPNRoIAlign)
from .bbox_head import (BBoxHead, TwoFCHead) from .bbox_head import (BBoxHead, TwoFCHead)
...@@ -82,7 +82,12 @@ class FasterRCNN(object): ...@@ -82,7 +82,12 @@ class FasterRCNN(object):
self.backbone = backbone self.backbone = backbone
self.mode = mode self.mode = mode
if with_fpn and fpn is None: if with_fpn and fpn is None:
fpn = FPN() if self.backbone.__class__.__name__.startswith('HRNet'):
fpn = HRFPN()
fpn.min_level = 2
fpn.max_level = 6
else:
fpn = FPN()
self.fpn = fpn self.fpn = fpn
self.num_classes = num_classes self.num_classes = num_classes
if rpn_head is None: if rpn_head is None:
......
...@@ -23,7 +23,7 @@ from paddle.fluid.param_attr import ParamAttr ...@@ -23,7 +23,7 @@ from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Xavier from paddle.fluid.initializer import Xavier
from paddle.fluid.regularizer import L2Decay from paddle.fluid.regularizer import L2Decay
__all__ = ['FPN'] __all__ = ['FPN', 'HRFPN']
def ConvNorm(input, def ConvNorm(input,
...@@ -219,8 +219,8 @@ class FPN(object): ...@@ -219,8 +219,8 @@ class FPN(object):
body_name = body_name_list[i] body_name = body_name_list[i]
body_input = body_dict[body_name] body_input = body_dict[body_name]
top_output = self.fpn_inner_output[i - 1] top_output = self.fpn_inner_output[i - 1]
fpn_inner_single = self._add_topdown_lateral( fpn_inner_single = self._add_topdown_lateral(body_name, body_input,
body_name, body_input, top_output) top_output)
self.fpn_inner_output[i] = fpn_inner_single self.fpn_inner_output[i] = fpn_inner_single
fpn_dict = {} fpn_dict = {}
fpn_name_list = [] fpn_name_list = []
...@@ -293,3 +293,107 @@ class FPN(object): ...@@ -293,3 +293,107 @@ class FPN(object):
spatial_scale.insert(0, spatial_scale[0] * 0.5) spatial_scale.insert(0, spatial_scale[0] * 0.5)
res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list]) res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list])
return res_dict, spatial_scale return res_dict, spatial_scale
class HRFPN(object):
"""
HRNet, see https://arxiv.org/abs/1908.07919
Args:
num_chan (int): number of feature channels
pooling_type (str): pooling type of downsampling
share_conv (bool): whethet to share conv for different layers' reduction
spatial_scale (list): feature map scaling factor
"""
def __init__(
self,
num_chan=256,
pooling_type="avg",
share_conv=False,
spatial_scale=[1. / 64, 1. / 32, 1. / 16, 1. / 8, 1. / 4], ):
self.num_chan = num_chan
self.pooling_type = pooling_type
self.share_conv = share_conv
self.spatial_scale = spatial_scale
def get_output(self, body_dict):
num_out = len(self.spatial_scale)
body_name_list = list(body_dict.keys())
num_backbone_stages = len(body_name_list)
outs = []
outs.append(body_dict[body_name_list[0]])
# resize
for i in range(1, len(body_dict)):
resized = self.resize_input_tensor(body_dict[body_name_list[i]],
outs[0], 2**i)
outs.append(resized)
# concat
out = fluid.layers.concat(outs, axis=1)
# reduction
out = fluid.layers.conv2d(
input=out,
num_filters=self.num_chan,
filter_size=1,
stride=1,
padding=0,
param_attr=ParamAttr(name='hrfpn_reduction_weights'),
bias_attr=False)
# conv
outs = [out]
for i in range(1, num_out):
outs.append(
self.pooling(
out,
size=2**i,
stride=2**i,
pooling_type=self.pooling_type))
outputs = []
for i in range(num_out):
conv_name = "shared_fpn_conv" if self.share_conv else "shared_fpn_conv_" + str(
i)
conv = fluid.layers.conv2d(
input=outs[i],
num_filters=self.num_chan,
filter_size=3,
stride=1,
padding=1,
param_attr=ParamAttr(name=conv_name + "_weights"),
bias_attr=False)
outputs.append(conv)
for idx in range(0, num_out - len(body_name_list)):
body_name_list.append("fpn_res5_sum_subsampled_{}x".format(2**(
idx + 1)))
outputs = outputs[::-1]
body_name_list = body_name_list[::-1]
res_dict = OrderedDict([(body_name_list[k], outputs[k])
for k in range(len(body_name_list))])
return res_dict, self.spatial_scale
def resize_input_tensor(self, body_input, ref_output, scale):
shape = fluid.layers.shape(ref_output)
shape_hw = fluid.layers.slice(shape, axes=[0], starts=[2], ends=[4])
out_shape_ = shape_hw
out_shape = fluid.layers.cast(out_shape_, dtype='int32')
out_shape.stop_gradient = True
body_output = fluid.layers.resize_bilinear(
body_input, scale=scale, out_shape=out_shape)
return body_output
def pooling(self, input, size, stride, pooling_type):
pool = fluid.layers.pool2d(
input=input,
pool_size=size,
pool_stride=stride,
pool_type=pooling_type)
return pool
...@@ -21,7 +21,7 @@ import copy ...@@ -21,7 +21,7 @@ import copy
import paddle.fluid as fluid import paddle.fluid as fluid
from .fpn import FPN from .fpn import (FPN, HRFPN)
from .rpn_head import (RPNHead, FPNRPNHead) from .rpn_head import (RPNHead, FPNRPNHead)
from .roi_extractor import (RoIAlign, FPNRoIAlign) from .roi_extractor import (RoIAlign, FPNRoIAlign)
from .bbox_head import (BBoxHead, TwoFCHead) from .bbox_head import (BBoxHead, TwoFCHead)
...@@ -92,11 +92,15 @@ class MaskRCNN(object): ...@@ -92,11 +92,15 @@ class MaskRCNN(object):
self.backbone = backbone self.backbone = backbone
self.mode = mode self.mode = mode
if with_fpn and fpn is None: if with_fpn and fpn is None:
fpn = FPN( if self.backbone.__class__.__name__.startswith('HRNet'):
num_chan=num_chan, fpn = HRFPN()
min_level=min_level, fpn.min_level = 2
max_level=max_level, fpn.max_level = 6
spatial_scale=spatial_scale) else:
fpn = FPN(num_chan=num_chan,
min_level=min_level,
max_level=max_level,
spatial_scale=spatial_scale)
self.fpn = fpn self.fpn = fpn
self.num_classes = num_classes self.num_classes = num_classes
if rpn_head is None: if rpn_head is None:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册