提交 5d4c75ff 编写于 作者: F FlyingQianMM

add hrnet for classifier and faster rcnn

上级 278994a3
......@@ -36,5 +36,6 @@ DenseNet121 = cv.models.DenseNet121
DenseNet161 = cv.models.DenseNet161
DenseNet201 = cv.models.DenseNet201
ShuffleNetV2 = cv.models.ShuffleNetV2
HRNet_W18 = cv.models.HRNet_W18
transforms = cv.transforms.cls_transforms
......@@ -34,11 +34,13 @@ from .classifier import DenseNet121
from .classifier import DenseNet161
from .classifier import DenseNet201
from .classifier import ShuffleNetV2
from .classifier import HRNet_W18
from .base import BaseAPI
from .yolo_v3 import YOLOv3
from .faster_rcnn import FasterRCNN
from .mask_rcnn import MaskRCNN
from .unet import UNet
from .deeplabv3p import DeepLabv3p
from .hrnet import HRNet
from .load_model import load_model
from .slim import prune
......@@ -31,6 +31,8 @@ from collections import OrderedDict
from os import path as osp
from paddle.fluid.framework import Program
from .utils.pretrain_weights import get_pretrain_weights
fluid.default_startup_program().random_seed = 1000
fluid.default_main_program().random_seed = 1000
def dict2str(dict_input):
......@@ -79,9 +81,9 @@ class BaseAPI:
return int(batch_size // len(self.places))
else:
raise Exception("Please support correct batch_size, \
which can be divided by available cards({}) in {}".
format(paddlex.env_info['num'],
paddlex.env_info['place']))
which can be divided by available cards({}) in {}"
.format(paddlex.env_info['num'], paddlex.env_info[
'place']))
def build_program(self):
# 构建训练网络
......@@ -210,8 +212,8 @@ class BaseAPI:
paddlex.utils.utils.load_pretrain_weights(
self.exe, self.train_prog, resume_checkpoint, resume=True)
if not osp.exists(osp.join(resume_checkpoint, "model.yml")):
raise Exception(
"There's not model.yml in {}".format(resume_checkpoint))
raise Exception("There's not model.yml in {}".format(
resume_checkpoint))
with open(osp.join(resume_checkpoint, "model.yml")) as f:
info = yaml.load(f.read(), Loader=yaml.Loader)
self.completed_epochs = info['completed_epochs']
......@@ -269,13 +271,13 @@ class BaseAPI:
except:
pass
if hasattr(self.test_transforms, 'to_rgb'):
if self.test_transforms.to_rgb:
info['TransformsMode'] = 'RGB'
else:
info['TransformsMode'] = 'BGR'
if hasattr(self, 'test_transforms'):
if hasattr(self.test_transforms, 'to_rgb'):
if self.test_transforms.to_rgb:
info['TransformsMode'] = 'RGB'
else:
info['TransformsMode'] = 'BGR'
if self.test_transforms is not None:
info['Transforms'] = list()
for op in self.test_transforms.transforms:
......@@ -362,8 +364,8 @@ class BaseAPI:
# 模型保存成功的标志
open(osp.join(save_dir, '.success'), 'w').close()
logging.info(
"Model for inference deploy saved in {}.".format(save_dir))
logging.info("Model for inference deploy saved in {}.".format(
save_dir))
def train_loop(self,
num_epochs,
......@@ -377,7 +379,8 @@ class BaseAPI:
early_stop=False,
early_stop_patience=5):
if train_dataset.num_samples < train_batch_size:
raise Exception('The amount of training datset must be larger than batch size.')
raise Exception(
'The amount of training datset must be larger than batch size.')
if not osp.isdir(save_dir):
if osp.exists(save_dir):
os.remove(save_dir)
......@@ -415,8 +418,8 @@ class BaseAPI:
build_strategy=build_strategy,
exec_strategy=exec_strategy)
total_num_steps = math.floor(
train_dataset.num_samples / train_batch_size)
total_num_steps = math.floor(train_dataset.num_samples /
train_batch_size)
num_steps = 0
time_stat = list()
time_train_one_epoch = None
......@@ -430,8 +433,8 @@ class BaseAPI:
if self.model_type == 'detector':
eval_batch_size = self._get_single_card_bs(train_batch_size)
if eval_dataset is not None:
total_num_steps_eval = math.ceil(
eval_dataset.num_samples / eval_batch_size)
total_num_steps_eval = math.ceil(eval_dataset.num_samples /
eval_batch_size)
if use_vdl:
# VisualDL component
......@@ -473,7 +476,9 @@ class BaseAPI:
if use_vdl:
for k, v in step_metrics.items():
log_writer.add_scalar('Metrics/Training(Step): {}'.format(k), v, num_steps)
log_writer.add_scalar(
'Metrics/Training(Step): {}'.format(k), v,
num_steps)
# 估算剩余时间
avg_step_time = np.mean(time_stat)
......@@ -481,11 +486,12 @@ class BaseAPI:
eta = (num_epochs - i - 1) * time_train_one_epoch + (
total_num_steps - step - 1) * avg_step_time
else:
eta = ((num_epochs - i) * total_num_steps - step -
1) * avg_step_time
eta = ((num_epochs - i) * total_num_steps - step - 1
) * avg_step_time
if time_eval_one_epoch is not None:
eval_eta = (total_eval_times - i //
save_interval_epochs) * time_eval_one_epoch
eval_eta = (
total_eval_times - i // save_interval_epochs
) * time_eval_one_epoch
else:
eval_eta = (
total_eval_times - i // save_interval_epochs
......@@ -495,10 +501,11 @@ class BaseAPI:
logging.info(
"[TRAIN] Epoch={}/{}, Step={}/{}, {}, time_each_step={}s, eta={}"
.format(i + 1, num_epochs, step + 1, total_num_steps,
dict2str(step_metrics), round(
avg_step_time, 2), eta_str))
dict2str(step_metrics),
round(avg_step_time, 2), eta_str))
train_metrics = OrderedDict(
zip(list(self.train_outputs.keys()), np.mean(records, axis=0)))
zip(list(self.train_outputs.keys()), np.mean(
records, axis=0)))
logging.info('[TRAIN] Epoch {} finished, {} .'.format(
i + 1, dict2str(train_metrics)))
time_train_one_epoch = time.time() - epoch_start_time
......@@ -534,7 +541,8 @@ class BaseAPI:
if isinstance(v, np.ndarray):
if v.size > 1:
continue
log_writer.add_scalar("Metrics/Eval(Epoch): {}".format(k), v, i+1)
log_writer.add_scalar(
"Metrics/Eval(Epoch): {}".format(k), v, i + 1)
self.save_model(save_dir=current_save_dir)
time_eval_one_epoch = time.time() - eval_epoch_start_time
eval_epoch_start_time = time.time()
......@@ -545,4 +553,4 @@ class BaseAPI:
best_accuracy))
if eval_dataset is not None and early_stop:
if earlystop(current_accuracy):
break
\ No newline at end of file
break
......@@ -40,8 +40,8 @@ class BaseClassifier(BaseAPI):
self.init_params = locals()
super(BaseClassifier, self).__init__('classifier')
if not hasattr(paddlex.cv.nets, str.lower(model_name)):
raise Exception(
"ERROR: There's no model named {}.".format(model_name))
raise Exception("ERROR: There's no model named {}.".format(
model_name))
self.model_name = model_name
self.labels = None
self.num_classes = num_classes
......@@ -218,15 +218,14 @@ class BaseClassifier(BaseAPI):
num_pad_samples = batch_size - num_samples
pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1))
images = np.concatenate([images, pad_images])
outputs = self.exe.run(
self.parallel_test_prog,
feed={'image': images},
fetch_list=list(self.test_outputs.values()))
outputs = self.exe.run(self.parallel_test_prog,
feed={'image': images},
fetch_list=list(self.test_outputs.values()))
outputs = [outputs[0][:num_samples]]
true_labels.extend(labels)
pred_scores.extend(outputs[0].tolist())
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(
epoch_id, step + 1, total_steps))
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
1, total_steps))
pred_top1_label = np.argsort(pred_scores)[:, -1]
pred_topk_label = np.argsort(pred_scores)[:, -k:]
......@@ -263,10 +262,9 @@ class BaseClassifier(BaseAPI):
self.arrange_transforms(
transforms=self.test_transforms, mode='test')
im = self.test_transforms(img_file)
result = self.exe.run(
self.test_prog,
feed={'image': im},
fetch_list=list(self.test_outputs.values()))
result = self.exe.run(self.test_prog,
feed={'image': im},
fetch_list=list(self.test_outputs.values()))
pred_label = np.argsort(result[0][0])[::-1][:true_topk]
res = [{
'category_id': l,
......@@ -400,3 +398,9 @@ class ShuffleNetV2(BaseClassifier):
def __init__(self, num_classes=1000):
super(ShuffleNetV2, self).__init__(
model_name='ShuffleNetV2', num_classes=num_classes)
class HRNet_W18(BaseClassifier):
def __init__(self, num_classes=1000):
super(HRNet_W18, self).__init__(
model_name='HRNet_W18', num_classes=num_classes)
......@@ -32,7 +32,7 @@ class FasterRCNN(BaseAPI):
Args:
num_classes (int): 包含了背景类的类别数。默认为81。
backbone (str): FasterRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50',
'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。
'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。
with_fpn (bool): 是否使用FPN结构。默认为True。
aspect_ratios (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。
anchor_sizes (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。
......@@ -47,7 +47,8 @@ class FasterRCNN(BaseAPI):
self.init_params = locals()
super(FasterRCNN, self).__init__('detector')
backbones = [
'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd'
'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd',
'HRNet_W18'
]
assert backbone in backbones, "backbone should be one of {}".format(
backbones)
......@@ -79,6 +80,12 @@ class FasterRCNN(BaseAPI):
layers = 101
variant = 'd'
norm_type = 'affine_channel'
elif backbone_name == 'HRNet_W18':
backbone = paddlex.cv.nets.hrnet.HRNet(
width=18, freeze_norm=True, norm_decay=0., freeze_at=0)
if self.with_fpn is False:
self.with_fpn = True
return backbone
if self.with_fpn:
backbone = paddlex.cv.nets.resnet.ResNet(
norm_type='bn' if norm_type is None else norm_type,
......@@ -117,12 +124,12 @@ class FasterRCNN(BaseAPI):
model_out = model.build_net(inputs)
loss = model_out['loss']
self.optimizer.minimize(loss)
outputs = OrderedDict([('loss', model_out['loss']),
('loss_cls', model_out['loss_cls']),
('loss_bbox', model_out['loss_bbox']),
('loss_rpn_cls', model_out['loss_rpn_cls']),
('loss_rpn_bbox',
model_out['loss_rpn_bbox'])])
outputs = OrderedDict(
[('loss', model_out['loss']),
('loss_cls', model_out['loss_cls']),
('loss_bbox', model_out['loss_bbox']),
('loss_rpn_cls', model_out['loss_rpn_cls']), (
'loss_rpn_bbox', model_out['loss_rpn_bbox'])])
else:
outputs = model.build_net(inputs)
return inputs, outputs
......@@ -227,7 +234,9 @@ class FasterRCNN(BaseAPI):
# 构建训练、验证、测试网络
self.build_program()
fuse_bn = True
if self.with_fpn and self.backbone in ['ResNet18', 'ResNet50']:
if self.with_fpn and self.backbone in [
'ResNet18', 'ResNet50', 'HRNet_W18'
]:
fuse_bn = False
self.net_initialize(
startup_prog=fluid.default_startup_program(),
......@@ -310,11 +319,10 @@ class FasterRCNN(BaseAPI):
'im_info': im_infos,
'im_shape': im_shapes,
}
outputs = self.exe.run(
self.test_prog,
feed=[feed_data],
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
outputs = self.exe.run(self.test_prog,
feed=[feed_data],
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
res = {
'bbox': (np.array(outputs[0]),
outputs[0].recursive_sequence_lengths())
......@@ -339,13 +347,13 @@ class FasterRCNN(BaseAPI):
res['is_difficult'] = (np.array(res_is_difficult),
[res_is_difficult_lod])
results.append(res)
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(
epoch_id, step + 1, total_steps))
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
1, total_steps))
box_ap_stats, eval_details = eval_results(
results, metric, eval_dataset.coco_gt, with_background=True)
metrics = OrderedDict(
zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'],
box_ap_stats))
zip(['bbox_mmap'
if metric == 'COCO' else 'bbox_map'], box_ap_stats))
if return_details:
return metrics, eval_details
return metrics
......@@ -373,15 +381,14 @@ class FasterRCNN(BaseAPI):
im = np.expand_dims(im, axis=0)
im_resize_info = np.expand_dims(im_resize_info, axis=0)
im_shape = np.expand_dims(im_shape, axis=0)
outputs = self.exe.run(
self.test_prog,
feed={
'image': im,
'im_info': im_resize_info,
'im_shape': im_shape
},
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
outputs = self.exe.run(self.test_prog,
feed={
'image': im,
'im_info': im_resize_info,
'im_shape': im_shape
},
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
res = {
k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(list(self.test_outputs.keys()), outputs)
......
......@@ -32,7 +32,7 @@ class MaskRCNN(FasterRCNN):
Args:
num_classes (int): 包含了背景类的类别数。默认为81。
backbone (str): MaskRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50',
'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。
'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。
with_fpn (bool): 是否使用FPN结构。默认为True。
aspect_ratios (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。
anchor_sizes (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。
......@@ -46,7 +46,8 @@ class MaskRCNN(FasterRCNN):
anchor_sizes=[32, 64, 128, 256, 512]):
self.init_params = locals()
backbones = [
'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd'
'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd',
'HRNet_W18'
]
assert backbone in backbones, "backbone should be one of {}".format(
backbones)
......@@ -81,13 +82,13 @@ class MaskRCNN(FasterRCNN):
model_out = model.build_net(inputs)
loss = model_out['loss']
self.optimizer.minimize(loss)
outputs = OrderedDict([('loss', model_out['loss']),
('loss_cls', model_out['loss_cls']),
('loss_bbox', model_out['loss_bbox']),
('loss_mask', model_out['loss_mask']),
('loss_rpn_cls', model_out['loss_rpn_cls']),
('loss_rpn_bbox',
model_out['loss_rpn_bbox'])])
outputs = OrderedDict(
[('loss', model_out['loss']),
('loss_cls', model_out['loss_cls']),
('loss_bbox', model_out['loss_bbox']),
('loss_mask', model_out['loss_mask']),
('loss_rpn_cls', model_out['loss_rpn_cls']), (
'loss_rpn_bbox', model_out['loss_rpn_bbox'])])
else:
outputs = model.build_net(inputs)
return inputs, outputs
......@@ -276,11 +277,10 @@ class MaskRCNN(FasterRCNN):
'im_info': im_infos,
'im_shape': im_shapes,
}
outputs = self.exe.run(
self.test_prog,
feed=[feed_data],
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
outputs = self.exe.run(self.test_prog,
feed=[feed_data],
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
res = {
'bbox': (np.array(outputs[0]),
outputs[0].recursive_sequence_lengths()),
......@@ -292,8 +292,8 @@ class MaskRCNN(FasterRCNN):
res['im_shape'] = (im_shapes, [])
res['im_id'] = (np.array(res_im_id), [])
results.append(res)
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(
epoch_id, step + 1, total_steps))
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
1, total_steps))
ap_stats, eval_details = eval_results(
results,
......@@ -302,8 +302,8 @@ class MaskRCNN(FasterRCNN):
with_background=True,
resolution=self.mask_head_resolution)
if metric == 'VOC':
if isinstance(ap_stats[0], np.ndarray) and isinstance(
ap_stats[1], np.ndarray):
if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1],
np.ndarray):
metrics = OrderedDict(
zip(['bbox_map', 'segm_map'],
[ap_stats[0][1], ap_stats[1][1]]))
......@@ -311,8 +311,8 @@ class MaskRCNN(FasterRCNN):
metrics = OrderedDict(
zip(['bbox_map', 'segm_map'], [0.0, 0.0]))
elif metric == 'COCO':
if isinstance(ap_stats[0], np.ndarray) and isinstance(
ap_stats[1], np.ndarray):
if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1],
np.ndarray):
metrics = OrderedDict(
zip(['bbox_mmap', 'segm_mmap'],
[ap_stats[0][0], ap_stats[1][0]]))
......@@ -346,15 +346,14 @@ class MaskRCNN(FasterRCNN):
im = np.expand_dims(im, axis=0)
im_resize_info = np.expand_dims(im_resize_info, axis=0)
im_shape = np.expand_dims(im_shape, axis=0)
outputs = self.exe.run(
self.test_prog,
feed={
'image': im,
'im_info': im_resize_info,
'im_shape': im_shape
},
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
outputs = self.exe.run(self.test_prog,
feed={
'image': im,
'im_info': im_resize_info,
'im_shape': im_shape
},
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
res = {
k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(list(self.test_outputs.keys()), outputs)
......@@ -368,8 +367,8 @@ class MaskRCNN(FasterRCNN):
import pycocotools.mask as mask_util
for index, xywh_res in enumerate(xywh_results):
del xywh_res['image_id']
xywh_res['mask'] = mask_util.decode(
segm_results[index]['segmentation'])
xywh_res['mask'] = mask_util.decode(segm_results[index][
'segmentation'])
xywh_res['category'] = self.labels[xywh_res['category_id']]
results.append(xywh_res)
return results
......@@ -56,6 +56,8 @@ image_pretrain = {
'https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_deeplab_pretrained.tar',
'ShuffleNetV2':
'https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar',
'HRNet_W18':
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W18_C_pretrained.tar',
}
coco_pretrain = {
......@@ -85,10 +87,11 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir):
backbone = 'DetResNet50'
assert backbone in image_pretrain, "There is not ImageNet pretrain weights for {}, you may try COCO.".format(
backbone)
# url = image_pretrain[backbone]
# fname = osp.split(url)[-1].split('.')[0]
# paddlex.utils.download_and_decompress(url, path=new_save_dir)
# return osp.join(new_save_dir, fname)
if backbone == "HRNet_W18":
url = image_pretrain[backbone]
fname = osp.split(url)[-1].split('.')[0]
paddlex.utils.download_and_decompress(url, path=new_save_dir)
return osp.join(new_save_dir, fname)
try:
hub.download(backbone, save_path=new_save_dir)
except Exception as e:
......
......@@ -23,6 +23,7 @@ from .segmentation import DeepLabv3p
from .xception import Xception
from .densenet import DenseNet
from .shufflenet_v2 import ShuffleNetV2
from .hrnet import HRNet
def resnet18(input, num_classes=1000):
......@@ -51,14 +52,20 @@ def resnet50_vd(input, num_classes=1000):
def resnet50_vd_ssld(input, num_classes=1000):
model = ResNet(layers=50, num_classes=num_classes,
variant='d', lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3])
model = ResNet(
layers=50,
num_classes=num_classes,
variant='d',
lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3])
return model(input)
def resnet101_vd_ssld(input, num_classes=1000):
model = ResNet(layers=101, num_classes=num_classes,
variant='d', lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3])
model = ResNet(
layers=101,
num_classes=num_classes,
variant='d',
lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3])
return model(input)
......@@ -93,14 +100,18 @@ def mobilenetv3_large(input, num_classes=1000):
def mobilenetv3_small_ssld(input, num_classes=1000):
model = MobileNetV3(num_classes=num_classes, model_name='small',
lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75])
model = MobileNetV3(
num_classes=num_classes,
model_name='small',
lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75])
return model(input)
def mobilenetv3_large_ssld(input, num_classes=1000):
model = MobileNetV3(num_classes=num_classes, model_name='large',
lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75])
model = MobileNetV3(
num_classes=num_classes,
model_name='large',
lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75])
return model(input)
......@@ -133,6 +144,12 @@ def densenet201(input, num_classes=1000):
model = DenseNet(layers=201, num_classes=num_classes)
return model(input)
def shufflenetv2(input, num_classes=1000):
model = ShuffleNetV2(num_classes=num_classes)
return model(input)
def hrnet_w18(input, num_classes=1000):
model = HRNet(width=18, num_classes=num_classes)
return model(input)
......@@ -21,7 +21,7 @@ import copy
from paddle import fluid
from .fpn import FPN
from .fpn import (FPN, HRFPN)
from .rpn_head import (RPNHead, FPNRPNHead)
from .roi_extractor import (RoIAlign, FPNRoIAlign)
from .bbox_head import (BBoxHead, TwoFCHead)
......@@ -82,7 +82,12 @@ class FasterRCNN(object):
self.backbone = backbone
self.mode = mode
if with_fpn and fpn is None:
fpn = FPN()
if self.backbone.__class__.__name__.startswith('HRNet'):
fpn = HRFPN()
fpn.min_level = 2
fpn.max_level = 6
else:
fpn = FPN()
self.fpn = fpn
self.num_classes = num_classes
if rpn_head is None:
......
......@@ -23,7 +23,7 @@ from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Xavier
from paddle.fluid.regularizer import L2Decay
__all__ = ['FPN']
__all__ = ['FPN', 'HRFPN']
def ConvNorm(input,
......@@ -219,8 +219,8 @@ class FPN(object):
body_name = body_name_list[i]
body_input = body_dict[body_name]
top_output = self.fpn_inner_output[i - 1]
fpn_inner_single = self._add_topdown_lateral(
body_name, body_input, top_output)
fpn_inner_single = self._add_topdown_lateral(body_name, body_input,
top_output)
self.fpn_inner_output[i] = fpn_inner_single
fpn_dict = {}
fpn_name_list = []
......@@ -293,3 +293,107 @@ class FPN(object):
spatial_scale.insert(0, spatial_scale[0] * 0.5)
res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list])
return res_dict, spatial_scale
class HRFPN(object):
"""
HRNet, see https://arxiv.org/abs/1908.07919
Args:
num_chan (int): number of feature channels
pooling_type (str): pooling type of downsampling
share_conv (bool): whethet to share conv for different layers' reduction
spatial_scale (list): feature map scaling factor
"""
def __init__(
self,
num_chan=256,
pooling_type="avg",
share_conv=False,
spatial_scale=[1. / 64, 1. / 32, 1. / 16, 1. / 8, 1. / 4], ):
self.num_chan = num_chan
self.pooling_type = pooling_type
self.share_conv = share_conv
self.spatial_scale = spatial_scale
def get_output(self, body_dict):
num_out = len(self.spatial_scale)
body_name_list = list(body_dict.keys())
num_backbone_stages = len(body_name_list)
outs = []
outs.append(body_dict[body_name_list[0]])
# resize
for i in range(1, len(body_dict)):
resized = self.resize_input_tensor(body_dict[body_name_list[i]],
outs[0], 2**i)
outs.append(resized)
# concat
out = fluid.layers.concat(outs, axis=1)
# reduction
out = fluid.layers.conv2d(
input=out,
num_filters=self.num_chan,
filter_size=1,
stride=1,
padding=0,
param_attr=ParamAttr(name='hrfpn_reduction_weights'),
bias_attr=False)
# conv
outs = [out]
for i in range(1, num_out):
outs.append(
self.pooling(
out,
size=2**i,
stride=2**i,
pooling_type=self.pooling_type))
outputs = []
for i in range(num_out):
conv_name = "shared_fpn_conv" if self.share_conv else "shared_fpn_conv_" + str(
i)
conv = fluid.layers.conv2d(
input=outs[i],
num_filters=self.num_chan,
filter_size=3,
stride=1,
padding=1,
param_attr=ParamAttr(name=conv_name + "_weights"),
bias_attr=False)
outputs.append(conv)
for idx in range(0, num_out - len(body_name_list)):
body_name_list.append("fpn_res5_sum_subsampled_{}x".format(2**(
idx + 1)))
outputs = outputs[::-1]
body_name_list = body_name_list[::-1]
res_dict = OrderedDict([(body_name_list[k], outputs[k])
for k in range(len(body_name_list))])
return res_dict, self.spatial_scale
def resize_input_tensor(self, body_input, ref_output, scale):
shape = fluid.layers.shape(ref_output)
shape_hw = fluid.layers.slice(shape, axes=[0], starts=[2], ends=[4])
out_shape_ = shape_hw
out_shape = fluid.layers.cast(out_shape_, dtype='int32')
out_shape.stop_gradient = True
body_output = fluid.layers.resize_bilinear(
body_input, scale=scale, out_shape=out_shape)
return body_output
def pooling(self, input, size, stride, pooling_type):
pool = fluid.layers.pool2d(
input=input,
pool_size=size,
pool_stride=stride,
pool_type=pooling_type)
return pool
......@@ -21,7 +21,7 @@ import copy
import paddle.fluid as fluid
from .fpn import FPN
from .fpn import (FPN, HRFPN)
from .rpn_head import (RPNHead, FPNRPNHead)
from .roi_extractor import (RoIAlign, FPNRoIAlign)
from .bbox_head import (BBoxHead, TwoFCHead)
......@@ -92,11 +92,15 @@ class MaskRCNN(object):
self.backbone = backbone
self.mode = mode
if with_fpn and fpn is None:
fpn = FPN(
num_chan=num_chan,
min_level=min_level,
max_level=max_level,
spatial_scale=spatial_scale)
if self.backbone.__class__.__name__.startswith('HRNet'):
fpn = HRFPN()
fpn.min_level = 2
fpn.max_level = 6
else:
fpn = FPN(num_chan=num_chan,
min_level=min_level,
max_level=max_level,
spatial_scale=spatial_scale)
self.fpn = fpn
self.num_classes = num_classes
if rpn_head is None:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册