未验证 提交 2cdafa10 编写于 作者: D dyning 提交者: GitHub

Merge pull request #1036 from tink2123/add_anno

Add anno for head
......@@ -23,6 +23,14 @@ import paddle.fluid as fluid
class ClsHead(object):
"""
Class orientation
Args:
params(dict): super parameters for build Class network
"""
def __init__(self, params):
super(ClsHead, self).__init__()
self.class_dim = params['class_dim']
......
......@@ -109,6 +109,12 @@ class EASTHead(object):
return f_score, f_geo
def __call__(self, inputs):
"""
Fuse different levels of feature map from backbone and predict results
Args:
inputs(list): feature maps from backbone
Return: predicts
"""
f_common = self.unet_fusion(inputs)
f_score, f_geo = self.detector_header(f_common)
predicts = OrderedDict()
......
......@@ -38,35 +38,66 @@ class SASTHead(object):
blocks{}: contain block_2, block_3, block_4, block_5, block_6, block_7 with
1/4, 1/8, 1/16, 1/32, 1/64, 1/128 resolution.
"""
f = [blocks['block_6'], blocks['block_5'], blocks['block_4'], blocks['block_3'], blocks['block_2']]
f = [
blocks['block_6'], blocks['block_5'], blocks['block_4'],
blocks['block_3'], blocks['block_2']
]
num_outputs = [256, 256, 192, 192, 128]
g = [None, None, None, None, None]
h = [None, None, None, None, None]
h = [None, None, None, None, None]
for i in range(5):
h[i] = conv_bn_layer(input=f[i], num_filters=num_outputs[i],
filter_size=1, stride=1, act=None, name='fpn_up_h'+str(i))
h[i] = conv_bn_layer(
input=f[i],
num_filters=num_outputs[i],
filter_size=1,
stride=1,
act=None,
name='fpn_up_h' + str(i))
for i in range(4):
if i == 0:
g[i] = deconv_bn_layer(input=h[i], num_filters=num_outputs[i + 1], act=None, name='fpn_up_g0')
g[i] = deconv_bn_layer(
input=h[i],
num_filters=num_outputs[i + 1],
act=None,
name='fpn_up_g0')
#print("g[{}] shape: {}".format(i, g[i].shape))
else:
g[i] = fluid.layers.elementwise_add(x=g[i - 1], y=h[i])
g[i] = fluid.layers.relu(g[i])
#g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i],
# filter_size=1, stride=1, act='relu')
g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i],
filter_size=3, stride=1, act='relu', name='fpn_up_g%d_1'%i)
g[i] = deconv_bn_layer(input=g[i], num_filters=num_outputs[i + 1], act=None, name='fpn_up_g%d_2'%i)
g[i] = conv_bn_layer(
input=g[i],
num_filters=num_outputs[i],
filter_size=3,
stride=1,
act='relu',
name='fpn_up_g%d_1' % i)
g[i] = deconv_bn_layer(
input=g[i],
num_filters=num_outputs[i + 1],
act=None,
name='fpn_up_g%d_2' % i)
#print("g[{}] shape: {}".format(i, g[i].shape))
g[4] = fluid.layers.elementwise_add(x=g[3], y=h[4])
g[4] = fluid.layers.relu(g[4])
g[4] = conv_bn_layer(input=g[4], num_filters=num_outputs[4],
filter_size=3, stride=1, act='relu', name='fpn_up_fusion_1')
g[4] = conv_bn_layer(input=g[4], num_filters=num_outputs[4],
filter_size=1, stride=1, act=None, name='fpn_up_fusion_2')
g[4] = conv_bn_layer(
input=g[4],
num_filters=num_outputs[4],
filter_size=3,
stride=1,
act='relu',
name='fpn_up_fusion_1')
g[4] = conv_bn_layer(
input=g[4],
num_filters=num_outputs[4],
filter_size=1,
stride=1,
act=None,
name='fpn_up_fusion_2')
return g[4]
def FPN_Down_Fusion(self, blocks):
......@@ -77,95 +108,245 @@ class SASTHead(object):
f = [blocks['block_0'], blocks['block_1'], blocks['block_2']]
num_outputs = [32, 64, 128]
g = [None, None, None]
h = [None, None, None]
h = [None, None, None]
for i in range(3):
h[i] = conv_bn_layer(input=f[i], num_filters=num_outputs[i],
filter_size=3, stride=1, act=None, name='fpn_down_h'+str(i))
h[i] = conv_bn_layer(
input=f[i],
num_filters=num_outputs[i],
filter_size=3,
stride=1,
act=None,
name='fpn_down_h' + str(i))
for i in range(2):
if i == 0:
g[i] = conv_bn_layer(input=h[i], num_filters=num_outputs[i+1], filter_size=3, stride=2, act=None, name='fpn_down_g0')
g[i] = conv_bn_layer(
input=h[i],
num_filters=num_outputs[i + 1],
filter_size=3,
stride=2,
act=None,
name='fpn_down_g0')
else:
g[i] = fluid.layers.elementwise_add(x=g[i - 1], y=h[i])
g[i] = fluid.layers.relu(g[i])
g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i], filter_size=3, stride=1, act='relu', name='fpn_down_g%d_1'%i)
g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i+1], filter_size=3, stride=2, act=None, name='fpn_down_g%d_2'%i)
g[i] = conv_bn_layer(
input=g[i],
num_filters=num_outputs[i],
filter_size=3,
stride=1,
act='relu',
name='fpn_down_g%d_1' % i)
g[i] = conv_bn_layer(
input=g[i],
num_filters=num_outputs[i + 1],
filter_size=3,
stride=2,
act=None,
name='fpn_down_g%d_2' % i)
# print("g[{}] shape: {}".format(i, g[i].shape))
g[2] = fluid.layers.elementwise_add(x=g[1], y=h[2])
g[2] = fluid.layers.relu(g[2])
g[2] = conv_bn_layer(input=g[2], num_filters=num_outputs[2],
filter_size=3, stride=1, act='relu', name='fpn_down_fusion_1')
g[2] = conv_bn_layer(input=g[2], num_filters=num_outputs[2],
filter_size=1, stride=1, act=None, name='fpn_down_fusion_2')
g[2] = conv_bn_layer(
input=g[2],
num_filters=num_outputs[2],
filter_size=3,
stride=1,
act='relu',
name='fpn_down_fusion_1')
g[2] = conv_bn_layer(
input=g[2],
num_filters=num_outputs[2],
filter_size=1,
stride=1,
act=None,
name='fpn_down_fusion_2')
return g[2]
def SAST_Header1(self, f_common):
"""Detector header."""
#f_score
f_score = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_score1')
f_score = conv_bn_layer(input=f_score, num_filters=64, filter_size=3, stride=1, act='relu', name='f_score2')
f_score = conv_bn_layer(input=f_score, num_filters=128, filter_size=1, stride=1, act='relu', name='f_score3')
f_score = conv_bn_layer(input=f_score, num_filters=1, filter_size=3, stride=1, name='f_score4')
f_score = conv_bn_layer(
input=f_common,
num_filters=64,
filter_size=1,
stride=1,
act='relu',
name='f_score1')
f_score = conv_bn_layer(
input=f_score,
num_filters=64,
filter_size=3,
stride=1,
act='relu',
name='f_score2')
f_score = conv_bn_layer(
input=f_score,
num_filters=128,
filter_size=1,
stride=1,
act='relu',
name='f_score3')
f_score = conv_bn_layer(
input=f_score,
num_filters=1,
filter_size=3,
stride=1,
name='f_score4')
f_score = fluid.layers.sigmoid(f_score)
# print("f_score shape: {}".format(f_score.shape))
#f_boder
f_border = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_border1')
f_border = conv_bn_layer(input=f_border, num_filters=64, filter_size=3, stride=1, act='relu', name='f_border2')
f_border = conv_bn_layer(input=f_border, num_filters=128, filter_size=1, stride=1, act='relu', name='f_border3')
f_border = conv_bn_layer(input=f_border, num_filters=4, filter_size=3, stride=1, name='f_border4')
f_border = conv_bn_layer(
input=f_common,
num_filters=64,
filter_size=1,
stride=1,
act='relu',
name='f_border1')
f_border = conv_bn_layer(
input=f_border,
num_filters=64,
filter_size=3,
stride=1,
act='relu',
name='f_border2')
f_border = conv_bn_layer(
input=f_border,
num_filters=128,
filter_size=1,
stride=1,
act='relu',
name='f_border3')
f_border = conv_bn_layer(
input=f_border,
num_filters=4,
filter_size=3,
stride=1,
name='f_border4')
# print("f_border shape: {}".format(f_border.shape))
return f_score, f_border
def SAST_Header2(self, f_common):
"""Detector header."""
"""Detector header."""
#f_tvo
f_tvo = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_tvo1')
f_tvo = conv_bn_layer(input=f_tvo, num_filters=64, filter_size=3, stride=1, act='relu', name='f_tvo2')
f_tvo = conv_bn_layer(input=f_tvo, num_filters=128, filter_size=1, stride=1, act='relu', name='f_tvo3')
f_tvo = conv_bn_layer(input=f_tvo, num_filters=8, filter_size=3, stride=1, name='f_tvo4')
f_tvo = conv_bn_layer(
input=f_common,
num_filters=64,
filter_size=1,
stride=1,
act='relu',
name='f_tvo1')
f_tvo = conv_bn_layer(
input=f_tvo,
num_filters=64,
filter_size=3,
stride=1,
act='relu',
name='f_tvo2')
f_tvo = conv_bn_layer(
input=f_tvo,
num_filters=128,
filter_size=1,
stride=1,
act='relu',
name='f_tvo3')
f_tvo = conv_bn_layer(
input=f_tvo, num_filters=8, filter_size=3, stride=1, name='f_tvo4')
# print("f_tvo shape: {}".format(f_tvo.shape))
#f_tco
f_tco = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_tco1')
f_tco = conv_bn_layer(input=f_tco, num_filters=64, filter_size=3, stride=1, act='relu', name='f_tco2')
f_tco = conv_bn_layer(input=f_tco, num_filters=128, filter_size=1, stride=1, act='relu', name='f_tco3')
f_tco = conv_bn_layer(input=f_tco, num_filters=2, filter_size=3, stride=1, name='f_tco4')
f_tco = conv_bn_layer(
input=f_common,
num_filters=64,
filter_size=1,
stride=1,
act='relu',
name='f_tco1')
f_tco = conv_bn_layer(
input=f_tco,
num_filters=64,
filter_size=3,
stride=1,
act='relu',
name='f_tco2')
f_tco = conv_bn_layer(
input=f_tco,
num_filters=128,
filter_size=1,
stride=1,
act='relu',
name='f_tco3')
f_tco = conv_bn_layer(
input=f_tco, num_filters=2, filter_size=3, stride=1, name='f_tco4')
# print("f_tco shape: {}".format(f_tco.shape))
return f_tvo, f_tco
def cross_attention(self, f_common):
"""
"""
f_shape = fluid.layers.shape(f_common)
f_theta = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, act='relu', name='f_theta')
f_phi = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, act='relu', name='f_phi')
f_g = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, act='relu', name='f_g')
f_theta = conv_bn_layer(
input=f_common,
num_filters=128,
filter_size=1,
stride=1,
act='relu',
name='f_theta')
f_phi = conv_bn_layer(
input=f_common,
num_filters=128,
filter_size=1,
stride=1,
act='relu',
name='f_phi')
f_g = conv_bn_layer(
input=f_common,
num_filters=128,
filter_size=1,
stride=1,
act='relu',
name='f_g')
### horizon
fh_theta = f_theta
fh_phi = f_phi
fh_g = f_g
#flatten
fh_theta = fluid.layers.transpose(fh_theta, [0, 2, 3, 1])
fh_theta = fluid.layers.reshape(fh_theta, [f_shape[0] * f_shape[2], f_shape[3], 128])
fh_theta = fluid.layers.reshape(
fh_theta, [f_shape[0] * f_shape[2], f_shape[3], 128])
fh_phi = fluid.layers.transpose(fh_phi, [0, 2, 3, 1])
fh_phi = fluid.layers.reshape(fh_phi, [f_shape[0] * f_shape[2], f_shape[3], 128])
fh_phi = fluid.layers.reshape(
fh_phi, [f_shape[0] * f_shape[2], f_shape[3], 128])
fh_g = fluid.layers.transpose(fh_g, [0, 2, 3, 1])
fh_g = fluid.layers.reshape(fh_g, [f_shape[0] * f_shape[2], f_shape[3], 128])
fh_g = fluid.layers.reshape(fh_g,
[f_shape[0] * f_shape[2], f_shape[3], 128])
#correlation
fh_attn = fluid.layers.matmul(fh_theta, fluid.layers.transpose(fh_phi, [0, 2, 1]))
fh_attn = fluid.layers.matmul(fh_theta,
fluid.layers.transpose(fh_phi, [0, 2, 1]))
#scale
fh_attn = fh_attn / (128 ** 0.5)
fh_attn = fh_attn / (128**0.5)
fh_attn = fluid.layers.softmax(fh_attn)
#weighted sum
fh_weight = fluid.layers.matmul(fh_attn, fh_g)
fh_weight = fluid.layers.reshape(fh_weight, [f_shape[0], f_shape[2], f_shape[3], 128])
fh_weight = fluid.layers.reshape(
fh_weight, [f_shape[0], f_shape[2], f_shape[3], 128])
# print("fh_weight: {}".format(fh_weight.shape))
fh_weight = fluid.layers.transpose(fh_weight, [0, 3, 1, 2])
fh_weight = conv_bn_layer(input=fh_weight, num_filters=128, filter_size=1, stride=1, name='fh_weight')
fh_weight = conv_bn_layer(
input=fh_weight,
num_filters=128,
filter_size=1,
stride=1,
name='fh_weight')
#short cut
fh_sc = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, name='fh_sc')
fh_sc = conv_bn_layer(
input=f_common,
num_filters=128,
filter_size=1,
stride=1,
name='fh_sc')
f_h = fluid.layers.relu(fh_weight + fh_sc)
######
#vertical
......@@ -174,31 +355,60 @@ class SASTHead(object):
fv_g = fluid.layers.transpose(f_g, [0, 1, 3, 2])
#flatten
fv_theta = fluid.layers.transpose(fv_theta, [0, 2, 3, 1])
fv_theta = fluid.layers.reshape(fv_theta, [f_shape[0] * f_shape[3], f_shape[2], 128])
fv_theta = fluid.layers.reshape(
fv_theta, [f_shape[0] * f_shape[3], f_shape[2], 128])
fv_phi = fluid.layers.transpose(fv_phi, [0, 2, 3, 1])
fv_phi = fluid.layers.reshape(fv_phi, [f_shape[0] * f_shape[3], f_shape[2], 128])
fv_phi = fluid.layers.reshape(
fv_phi, [f_shape[0] * f_shape[3], f_shape[2], 128])
fv_g = fluid.layers.transpose(fv_g, [0, 2, 3, 1])
fv_g = fluid.layers.reshape(fv_g, [f_shape[0] * f_shape[3], f_shape[2], 128])
fv_g = fluid.layers.reshape(fv_g,
[f_shape[0] * f_shape[3], f_shape[2], 128])
#correlation
fv_attn = fluid.layers.matmul(fv_theta, fluid.layers.transpose(fv_phi, [0, 2, 1]))
fv_attn = fluid.layers.matmul(fv_theta,
fluid.layers.transpose(fv_phi, [0, 2, 1]))
#scale
fv_attn = fv_attn / (128 ** 0.5)
fv_attn = fv_attn / (128**0.5)
fv_attn = fluid.layers.softmax(fv_attn)
#weighted sum
fv_weight = fluid.layers.matmul(fv_attn, fv_g)
fv_weight = fluid.layers.reshape(fv_weight, [f_shape[0], f_shape[3], f_shape[2], 128])
fv_weight = fluid.layers.reshape(
fv_weight, [f_shape[0], f_shape[3], f_shape[2], 128])
# print("fv_weight: {}".format(fv_weight.shape))
fv_weight = fluid.layers.transpose(fv_weight, [0, 3, 2, 1])
fv_weight = conv_bn_layer(input=fv_weight, num_filters=128, filter_size=1, stride=1, name='fv_weight')
fv_weight = conv_bn_layer(
input=fv_weight,
num_filters=128,
filter_size=1,
stride=1,
name='fv_weight')
#short cut
fv_sc = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, name='fv_sc')
fv_sc = conv_bn_layer(
input=f_common,
num_filters=128,
filter_size=1,
stride=1,
name='fv_sc')
f_v = fluid.layers.relu(fv_weight + fv_sc)
######
f_attn = fluid.layers.concat([f_h, f_v], axis=1)
f_attn = conv_bn_layer(input=f_attn, num_filters=128, filter_size=1, stride=1, act='relu', name='f_attn')
f_attn = conv_bn_layer(
input=f_attn,
num_filters=128,
filter_size=1,
stride=1,
act='relu',
name='f_attn')
return f_attn
def __call__(self, blocks, with_cab=False):
"""
Fuse different levels of feature map from backbone and predict results
Args:
blocks(list): feature maps from backbone
with_cab(bool): whether use cross_attention
Return: predicts
"""
# for k, v in blocks.items():
# print(k, v.shape)
......@@ -212,12 +422,12 @@ class SASTHead(object):
f_common = fluid.layers.elementwise_add(x=f_down, y=f_up)
f_common = fluid.layers.relu(f_common)
# print("f_common: {}".format(f_common.shape))
if self.with_cab:
# print('enhence f_common with CAB.')
f_common = self.cross_attention(f_common)
f_score, f_border= self.SAST_Header1(f_common)
f_score, f_border = self.SAST_Header1(f_common)
f_tvo, f_tco = self.SAST_Header2(f_common)
predicts = OrderedDict()
......@@ -225,4 +435,4 @@ class SASTHead(object):
predicts['f_border'] = f_border
predicts['f_tvo'] = f_tvo
predicts['f_tco'] = f_tco
return predicts
\ No newline at end of file
return predicts
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册