# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import paddle.fluid as fluid from paddle.nn import initializer as I from paddle.nn import Layer from paddle.fluid.layers import utils from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.param_attr import ParamAttr from paddle import _legacy_C_ops __all__ = ['resnet_basic_block', 'ResNetBasicBlock'] def resnet_basic_block(x, filter1, scale1, bias1, mean1, var1, filter2, scale2, bias2, mean2, var2, filter3, scale3, bias3, mean3, var3, stride1, stride2, stride3, padding1, padding2, padding3, dilation1, dilation2, dilation3, groups, momentum, eps, data_format, has_shortcut, use_global_stats=None, training=False, trainable_statistics=False, find_conv_max=True): if fluid.framework._non_static_mode(): attrs = ('stride1', stride1, 'stride2', stride2, 'stride3', stride3, 'padding1', padding1, 'padding2', padding2, 'padding3', padding3, 'dilation1', dilation1, 'dilation2', dilation2, 'dilation3', dilation3, 'group', groups, 'momentum', momentum, 'epsilon', eps, 'data_format', data_format, 'has_shortcut', has_shortcut, 'use_global_stats', use_global_stats, "trainable_statistics", trainable_statistics, 'is_test', not training, 'act_type', "relu", 'find_conv_input_max', find_conv_max) out, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _ = \ _legacy_C_ops.resnet_basic_block(x, filter1, scale1, bias1, mean1, var1, filter2, scale2, bias2, mean2, var2, \ filter3, scale3, bias3, mean3, var3, mean1, var1, mean2, var2, mean3, var3, *attrs) return out helper = LayerHelper('resnet_basic_block', **locals()) bn_param_dtype = fluid.core.VarDesc.VarType.FP32 max_dtype = fluid.core.VarDesc.VarType.FP32 out = helper.create_variable_for_type_inference(dtype=x.dtype, stop_gradient=True) conv1 = helper.create_variable_for_type_inference(dtype=x.dtype, stop_gradient=True) saved_mean1 = helper.create_variable_for_type_inference( dtype=bn_param_dtype, stop_gradient=True) saved_invstd1 = helper.create_variable_for_type_inference( dtype=bn_param_dtype, stop_gradient=True) running_mean1 = helper.create_variable_for_type_inference( dtype=bn_param_dtype, stop_gradient=True) if mean1 is None else mean1 running_var1 = helper.create_variable_for_type_inference( dtype=bn_param_dtype, stop_gradient=True) if var1 is None else var1 conv2 = helper.create_variable_for_type_inference(dtype=x.dtype, stop_gradient=True) conv2_input = helper.create_variable_for_type_inference(dtype=x.dtype, stop_gradient=True) saved_mean2 = helper.create_variable_for_type_inference( dtype=bn_param_dtype, stop_gradient=True) saved_invstd2 = helper.create_variable_for_type_inference( dtype=bn_param_dtype, stop_gradient=True) running_mean2 = helper.create_variable_for_type_inference( dtype=bn_param_dtype, stop_gradient=True) if mean2 is None else mean2 running_var2 = helper.create_variable_for_type_inference( dtype=bn_param_dtype, stop_gradient=True) if var2 is None else var2 conv3 = helper.create_variable_for_type_inference(dtype=x.dtype, stop_gradient=True) saved_mean3 = helper.create_variable_for_type_inference( dtype=bn_param_dtype, stop_gradient=True) saved_invstd3 = helper.create_variable_for_type_inference( dtype=bn_param_dtype, stop_gradient=True) running_mean3 = helper.create_variable_for_type_inference( dtype=bn_param_dtype, stop_gradient=True) if mean3 is None else mean3 running_var3 = helper.create_variable_for_type_inference( dtype=bn_param_dtype, stop_gradient=True) if var3 is None else var3 conv1_input_max = helper.create_variable_for_type_inference( dtype=max_dtype, stop_gradient=True) conv1_filter_max = helper.create_variable_for_type_inference( dtype=max_dtype, stop_gradient=True) conv2_input_max = helper.create_variable_for_type_inference( dtype=max_dtype, stop_gradient=True) conv2_filter_max = helper.create_variable_for_type_inference( dtype=max_dtype, stop_gradient=True) conv3_input_max = helper.create_variable_for_type_inference( dtype=max_dtype, stop_gradient=True) conv3_filter_max = helper.create_variable_for_type_inference( dtype=max_dtype, stop_gradient=True) inputs = { 'X': x, 'Filter1': filter1, 'Scale1': scale1, 'Bias1': bias1, 'Mean1': mean1, 'Var1': var1, 'Filter2': filter2, 'Scale2': scale2, 'Bias2': bias2, 'Mean2': mean2, 'Var2': var2, 'Filter3': filter3, 'Scale3': scale3, 'Bias3': bias3, 'Mean3': mean3, 'Var3': var3, } attrs = { 'stride1': stride1, 'stride2': stride2, 'stride3': stride3, 'padding1': padding1, 'padding2': padding2, 'padding3': padding3, 'dilation1': dilation1, 'dilation2': dilation2, 'dilation3': dilation3, 'group': groups, 'momentum': momentum, 'epsilon': eps, 'data_format': data_format, 'has_shortcut': has_shortcut, 'use_global_stats': use_global_stats, "trainable_statistics": trainable_statistics, 'is_test': not training, 'act_type': "relu", 'find_conv_input_max': find_conv_max } outputs = { 'Y': out, 'Conv1': conv1, 'SavedMean1': saved_mean1, 'SavedInvstd1': saved_invstd1, 'Mean1Out': running_mean1, 'Var1Out': running_var1, 'Conv2': conv2, 'SavedMean2': saved_mean2, 'SavedInvstd2': saved_invstd2, 'Mean2Out': running_mean2, 'Var2Out': running_var2, 'Conv2Input': conv2_input, 'Conv3': conv3, 'SavedMean3': saved_mean3, 'SavedInvstd3': saved_invstd3, 'Mean3Out': running_mean3, 'Var3Out': running_var3, 'MaxInput1': conv1_input_max, 'MaxFilter1': conv1_filter_max, 'MaxInput2': conv2_input_max, 'MaxFilter2': conv2_filter_max, 'MaxInput3': conv3_input_max, 'MaxFilter3': conv3_filter_max, } helper.append_op(type='resnet_basic_block', inputs=inputs, outputs=outputs, attrs=attrs) return out class ResNetBasicBlock(Layer): r""" ResNetBasicBlock is designed for optimize the performence of the basic unit of ssd resnet block. If has_shortcut = True, it can calculate 3 Conv2D, 3 BatchNorm and 2 ReLU in one time. If has_shortcut = False, it can calculate 2 Conv2D, 2 BatchNorm and 2 ReLU in one time. In this case the shape of output is same with input. Args: num_channels (int): The number of input image channel. num_filter (int): The number of filter. It is as same as the output image channel. filter_size (int|list|tuple): The filter size. If filter_size is a tuple, it must contain two integers, (filter_size_height, filter_size_width). Otherwise, filter_size_height = filter_size_width =\ filter_size. stride (int, optional): The stride size. It means the stride in convolution. If stride is a tuple, it must contain two integers, (stride_height, stride_width). Otherwise, stride_height = stride_width = stride. Default: stride = 1. act (str, optional): Activation type, if it is set to None, activation is not appended. Default: None momentum (float, optional): The value used for the moving_mean and moving_var computation. This should be a float number or a Tensor with shape [1] and data type as float32. The updated formula is: :math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)` :math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)` Default is 0.9. eps (float, optional): A value added to the denominator for numerical stability. Default is 1e-5. data_format (str, optional): Specify the data format of the input, and the data format of the output will be consistent with that of the input. Now is only support `"NCHW"`, the data is stored in the order of: `[batch_size, input_channels, input_height, input_width]`. has_shortcut (bool, optional): Whether to calculate CONV3 and BN3. Default: False. use_global_stats (bool, optional): Whether to use global mean and variance. In inference or test mode, set use_global_stats to true or is_test to true, and the behavior is equivalent. In train mode, when setting use_global_stats True, the global mean and variance are also used during train period. Default: False. is_test (bool, optional): A flag indicating whether it is in test phrase or not. Default: False. filter_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights of conv2d. If it is set to None or one attribute of ParamAttr, conv2d will create ParamAttr as param_attr. Default: None. scale_attr (ParamAttr|None): The parameter attribute for Parameter `scale` of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm will create ParamAttr as param_attr, the name of scale can be set in ParamAttr. If the Initializer of the param_attr is not set, the parameter is initialized with Xavier. Default: None. bias_attr (ParamAttr|None): The parameter attribute for the bias of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr. If the Initializer of the bias_attr is not set, the bias is initialized zero. Default: None. moving_mean_name (str, optional): The name of moving_mean which store the global Mean. If it is set to None, batch_norm will save global mean with a random name, otherwise, batch_norm will save global mean with the string. Default: None. moving_var_name (str, optional): The name of the moving_variance which store the global Variance. If it is set to None, batch_norm will save global variance with a random name, otherwise, batch_norm will save global variance with the string. Default: None. padding (int, optional): The padding size. It is only spupport padding_height = padding_width = padding. Default: padding = 0. dilation (int, optional): The dilation size. It means the spacing between the kernel points. It is only spupport dilation_height = dilation_width = dilation. Default: dilation = 1. trainable_statistics (bool, optional): Whether to calculate mean and var in eval mode. In eval mode, when setting trainable_statistics True, mean and variance will be calculated by current batch statistics. Default: False. find_conv_max (bool, optional): Whether to calculate max value of each conv2d. Default: True. Returns: A Tensor representing the ResNetBasicBlock, whose data type is the same with input. Examples: .. code-block:: python # required: xpu import paddle from paddle.incubate.xpu.resnet_block import ResNetBasicBlock ch_in = 4 ch_out = 8 x = paddle.uniform((2, ch_in, 16, 16), dtype='float32', min=-1., max=1.) resnet_basic_block = ResNetBasicBlock(num_channels1=ch_in, num_filter1=ch_out, filter1_size=3, num_channels2=ch_out, num_filter2=ch_out, filter2_size=3, num_channels3=ch_in, num_filter3=ch_out, filter3_size=1, stride1=1, stride2=1, stride3=1, act='relu', padding1=1, padding2=1, padding3=0, has_shortcut=True) out = resnet_basic_block.forward(x) print(out.shape) # [2, 8, 16, 16] """ def __init__(self, num_channels1, num_filter1, filter1_size, num_channels2, num_filter2, filter2_size, num_channels3, num_filter3, filter3_size, stride1=1, stride2=1, stride3=1, act='relu', momentum=0.9, eps=1e-5, data_format='NCHW', has_shortcut=False, use_global_stats=False, is_test=False, filter1_attr=None, scale1_attr=None, bias1_attr=None, moving_mean1_name=None, moving_var1_name=None, filter2_attr=None, scale2_attr=None, bias2_attr=None, moving_mean2_name=None, moving_var2_name=None, filter3_attr=None, scale3_attr=None, bias3_attr=None, moving_mean3_name=None, moving_var3_name=None, padding1=0, padding2=0, padding3=0, dilation1=1, dilation2=1, dilation3=1, trainable_statistics=False, find_conv_max=True): super(ResNetBasicBlock, self).__init__() self._stride1 = stride1 self._stride2 = stride2 self._kernel1_size = utils.convert_to_list(filter1_size, 2, 'filter1_size') self._kernel2_size = utils.convert_to_list(filter2_size, 2, 'filter2_size') self._dilation1 = dilation1 self._dilation2 = dilation2 self._padding1 = padding1 self._padding2 = padding2 self._groups = 1 self._momentum = momentum self._eps = eps self._data_format = data_format self._act = act self._has_shortcut = has_shortcut self._use_global_stats = use_global_stats self._is_test = is_test self._trainable_statistics = trainable_statistics self._find_conv_max = find_conv_max if has_shortcut: self._kernel3_size = utils.convert_to_list(filter3_size, 2, 'filter3_size') self._padding3 = padding3 self._stride3 = stride3 self._dilation3 = dilation3 else: self._kernel3_size = None self._padding3 = 1 self._stride3 = 1 self._dilation3 = 1 # check format valid_format = {'NCHW'} if data_format not in valid_format: raise ValueError( "conv_format must be one of {}, but got conv_format={}".format( valid_format, data_format)) def _get_default_param_initializer(channels, kernel_size): filter_elem_num = np.prod(kernel_size) * channels std = (2.0 / filter_elem_num)**0.5 return I.Normal(0.0, std) # init filter bn_param_dtype = fluid.core.VarDesc.VarType.FP32 bn1_param_shape = [1, 1, num_filter1] bn2_param_shape = [1, 1, num_filter2] filter1_shape = [num_filter1, num_channels1, filter1_size, filter1_size] filter2_shape = [num_filter2, num_channels2, filter2_size, filter2_size] self.filter_1 = self.create_parameter( shape=filter1_shape, attr=filter1_attr, default_initializer=_get_default_param_initializer( num_channels1, self._kernel1_size)) self.scale_1 = self.create_parameter( shape=bn1_param_shape, attr=scale1_attr, dtype=bn_param_dtype, default_initializer=I.Constant(1.0)) self.bias_1 = self.create_parameter(shape=bn1_param_shape, attr=bias1_attr, dtype=bn_param_dtype, is_bias=True) self.mean_1 = self.create_parameter(attr=ParamAttr( name=moving_mean1_name, initializer=I.Constant(0.0), trainable=False), shape=bn1_param_shape, dtype=bn_param_dtype) self.mean_1.stop_gradient = True self.var_1 = self.create_parameter( attr=ParamAttr(name=moving_var1_name, initializer=I.Constant(1.0), trainable=False), shape=bn1_param_shape, dtype=bn_param_dtype) self.var_1.stop_gradient = True self.filter_2 = self.create_parameter( shape=filter2_shape, attr=filter2_attr, default_initializer=_get_default_param_initializer( num_channels2, self._kernel2_size)) self.scale_2 = self.create_parameter( shape=bn2_param_shape, attr=scale2_attr, dtype=bn_param_dtype, default_initializer=I.Constant(1.0)) self.bias_2 = self.create_parameter(shape=bn2_param_shape, attr=bias2_attr, dtype=bn_param_dtype, is_bias=True) self.mean_2 = self.create_parameter(attr=ParamAttr( name=moving_mean2_name, initializer=I.Constant(0.0), trainable=False), shape=bn2_param_shape, dtype=bn_param_dtype) self.mean_2.stop_gradient = True self.var_2 = self.create_parameter( attr=ParamAttr(name=moving_var2_name, initializer=I.Constant(1.0), trainable=False), shape=bn2_param_shape, dtype=bn_param_dtype) self.var_2.stop_gradient = True if has_shortcut: bn3_param_shape = [1, 1, num_filter3] filter3_shape = [ num_filter3, num_channels3, filter3_size, filter3_size ] self.filter_3 = self.create_parameter( shape=filter3_shape, attr=filter3_attr, default_initializer=_get_default_param_initializer( num_channels3, self._kernel3_size)) self.scale_3 = self.create_parameter( shape=bn3_param_shape, attr=scale3_attr, dtype=bn_param_dtype, default_initializer=I.Constant(1.0)) self.bias_3 = self.create_parameter(shape=bn3_param_shape, attr=bias3_attr, dtype=bn_param_dtype, is_bias=True) self.mean_3 = self.create_parameter(attr=ParamAttr( name=moving_mean3_name, initializer=I.Constant(0.0), trainable=False), shape=bn3_param_shape, dtype=bn_param_dtype) self.mean_3.stop_gradient = True self.var_3 = self.create_parameter(attr=ParamAttr( name=moving_var3_name, initializer=I.Constant(1.0), trainable=False), shape=bn3_param_shape, dtype=bn_param_dtype) self.var_3.stop_gradient = True else: self.filter_3 = None self.scale_3 = None self.bias_3 = None self.mean_3 = None self.var_3 = None def forward(self, x): out = resnet_basic_block( x, self.filter_1, self.scale_1, self.bias_1, self.mean_1, self.var_1, self.filter_2, self.scale_2, self.bias_2, self.mean_2, self.var_2, self.filter_3, self.scale_3, self.bias_3, self.mean_3, self.var_3, self._stride1, self._stride2, self._stride3, self._padding1, self._padding2, self._padding3, self._dilation1, self._dilation2, self._dilation3, self._groups, self._momentum, self._eps, self._data_format, self._has_shortcut, use_global_stats=self._use_global_stats, training=self.training, trainable_statistics=self._trainable_statistics, find_conv_max=self._find_conv_max) return out