# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import paddle import paddle.fluid as fluid from paddle.fluid import ParamAttr from paddle.fluid.initializer import ConstantInitializer def spatial_pool(x, pooling_type, name): _, channel, height, width = x.shape if pooling_type == 'att': input_x = x # [N, 1, C, H * W] input_x = fluid.layers.reshape(input_x, shape=(0, 1, channel, -1)) context_mask = fluid.layers.conv2d( input=x, num_filters=1, filter_size=1, stride=1, padding=0, param_attr=ParamAttr(name=name + "_weights"), bias_attr=ParamAttr(name=name + "_bias")) # [N, 1, H * W] context_mask = fluid.layers.reshape(context_mask, shape=(0, 0, -1)) # [N, 1, H * W] context_mask = fluid.layers.softmax(context_mask, axis=2) # [N, 1, H * W, 1] context_mask = fluid.layers.reshape(context_mask, shape=(0, 0, -1, 1)) # [N, 1, C, 1] context = fluid.layers.matmul(input_x, context_mask) # [N, C, 1, 1] context = fluid.layers.reshape(context, shape=(0, channel, 1, 1)) else: # [N, C, 1, 1] context = fluid.layers.pool2d( input=x, pool_type='avg', global_pooling=True) return context def channel_conv(input, inner_ch, out_ch, name): conv = fluid.layers.conv2d( input=input, num_filters=inner_ch, filter_size=1, stride=1, padding=0, param_attr=ParamAttr(name=name + "_conv1_weights"), bias_attr=ParamAttr(name=name + "_conv1_bias"), name=name + "_conv1", ) conv = fluid.layers.layer_norm( conv, begin_norm_axis=1, param_attr=ParamAttr(name=name + "_ln_weights"), bias_attr=ParamAttr(name=name + "_ln_bias"), act="relu", name=name + "_ln") conv = fluid.layers.conv2d( input=conv, num_filters=out_ch, filter_size=1, stride=1, padding=0, param_attr=ParamAttr( name=name + "_conv2_weights", initializer=ConstantInitializer(value=0.0), ), bias_attr=ParamAttr( name=name + "_conv2_bias", initializer=ConstantInitializer(value=0.0), ), name=name + "_conv2") return conv def add_gc_block(x, ratio=1.0 / 16, pooling_type='att', fusion_types=['channel_add'], name=None): ''' GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond, see https://arxiv.org/abs/1904.11492 Args: ratio (float): channel reduction ratio pooling_type (str): pooling type, support att and avg fusion_types (list): fusion types, support channel_add and channel_mul name (str): prefix name of gc block ''' assert pooling_type in ['avg', 'att'] assert isinstance(fusion_types, (list, tuple)) valid_fusion_types = ['channel_add', 'channel_mul'] assert all([f in valid_fusion_types for f in fusion_types]) assert len(fusion_types) > 0, 'at least one fusion should be used' inner_ch = int(ratio * x.shape[1]) out_ch = x.shape[1] context = spatial_pool(x, pooling_type, name + "_spatial_pool") out = x if 'channel_mul' in fusion_types: inner_out = channel_conv(context, inner_ch, out_ch, name + "_mul") channel_mul_term = fluid.layers.sigmoid(inner_out) out = out * channel_mul_term if 'channel_add' in fusion_types: channel_add_term = channel_conv(context, inner_ch, out_ch, name + "_add") out = out + channel_add_term return out