dnn.oprdecl 14.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
decl_opr('Convolution',
         pyname='convolution_v0',
         inputs=[Doc('src',
                     'input image in (batch, channel, row, col) format'),
                 Doc('filter',
                     'convolution kernel in '
                     '(out channel, in channel, kern row, kern col) format')],
         params=[('param', 'ConvolutionV0'),
                 ('execution_polity', 'ExecutionPolicy')],
         desc='batched convolution on channeled 2D images')

decl_opr('Convolution',
        inputs=[Doc('src',
                     'input image in (batch, channel, row, col) format'),
                 Doc('filter',
                     'convolution kernel in '
                     '(out channel, in channel, kern row, kern col) format')],
         params=[('param', 'Convolution'),
                 ('execution_polity', 'ExecutionPolicy')],
         desc='batched convolution on channeled 2D images',
         version=1, has_out_dtype=True)

decl_opr('ConvolutionBackwardData',
         pyname='deconvolution_v0',
         inputs=[Doc('src',
                     'input image in (batch, channel, row, col) format'),
                 Doc('filter',
                     'convolution kernel in '
                     '(out channel, in channel, kern row, kern col) format')],
         params=[('param', 'ConvolutionV0'),
                 ('execution_polity', 'ExecutionPolicy')],
         body=[
             'a, b = all_inputs',
             'all_inputs = [b, a]'
         ],
         desc='batched deconvolution on channeled 2D images; the underlying '
         'computation is in fact gradient of convolution w.r.t. data')

decl_opr('ConvolutionBackwardData',
         pyname='deconvolution',
         inputs=[Doc('src',
                     'input image in (batch, channel, row, col) format'),
                 Doc('filter',
                     'convolution kernel in '
                     '(out channel, in channel, kern row, kern col) format')],
         params=[('param', 'Convolution'),
                 ('execution_polity', 'ExecutionPolicy')],
         body=[
             'a, b = all_inputs',
             'all_inputs = [b, a]'
         ],
         desc='batched deconvolution on channeled 2D images; the underlying '
         'computation is in fact gradient of convolution w.r.t. data',
         version=1)

decl_opr('MaskConvolution',
         inputs=[Doc('src',
                     'input image in (batch, channel, row, col) format'),
                 Doc('filter',
                     'convolution kernel in '
                     '(out channel, in channel, kern row, kern col) format'),
                 Doc('mask',
                     'the 0/1 matrix, each element at (i, j) indicates the '
                     '*output(i, j)* of DefaultConvolution should be zero')],
         params=[('param', 'Convolution')],
         desc=('batched mask conv on channeled 2D images, mask is correspoding '
               'to output'),
         version=1)

decl_opr('MaskPropagate',
         inputs=[Doc('src',
                     '0/1 matrix for MaskConvolution\'s input')],
         params=[('param', 'MaskPropagate')],
         desc=('calculates the mask for output by given kernel, stride and '
               'padding'))

decl_opr('Images2Neibs',
         inputs=[Doc('src',
                     'input image in (batch, channel, row, col) format')],
         params='Images2Neibs',
         desc=Doc(None,
r"""
    Apply a sliding window to input tensor and copy content in the window to
    corresponding output location. Assume input shape is :math:`(N, C, IH, IW)`,
    then output shape would be :math:`(N, C, OH, OW, window_h, window_w)` where
    :math:`(OH, OW)` would be computed from padding, stride, window and
    :math:`(IH, IW)`, as in convolution. For each output location, we have;

    .. math::

        out_{n, c, oh, ow, wh, ww} &= src_{n, c, ih+wh, iw+ww} \\\\
        \\text{where } & ih=-pad_h+oh \\times stride_h \\\\
                       & iw=-pad_w+ow \\times
        stride_w
"""))

decl_opr('Local',
98
         pyname='local',
99 100 101 102 103 104 105 106 107 108
         inputs=[Doc('src',
                     'input image in (batch, channel, row, col) format'),
                 Doc('filter',
                     'convolution kernel in '
                     '(out row, out col, in channel, '
                     'kern row, kern col, out channel) format')],
         params='ConvolutionV0',
         desc='batched convolution on channeled 2D images, but kernels are '
         'not shared across different output positions')

109 110 111 112 113 114 115 116 117 118 119 120 121
decl_opr('Local',
         pyname='local_v1',
         inputs=[Doc('src',
                     'input image in (batch, channel, row, col) format'),
                 Doc('filter',
                     'convolution kernel in '
                     '(out row, out col, in channel, '
                     'kern row, kern col, out channel) format')],
         params='Convolution',
         desc='batched convolution on channeled 2D images, but kernels are '
         'not shared across different output positions',
         version=1)

122 123 124 125 126 127 128
decl_opr('GroupLocal',
         inputs=[Doc('src',
                     'input image in (batch, channel, row, col) format'),
                 Doc('filter',
                     'convolution kernel in '
                     '(group, out row, out col, in channel / group, '
                     'kern row, kern col, out channel / group) format')],
129
         params=[('param', 'Convolution')],
130
         desc='batched convolution on groupped channeled 2D images, but '
131 132
         'kernels are not shared across different output positions',
         version=1)
133 134 135 136 137 138 139 140 141 142

decl_opr('LRN',
         inputs=['src'],
         params='LRN',
         desc='local response normalization')

decl_opr('Pooling',
         inputs=['src'],
         params='Pooling')

143 144 145 146 147 148 149
decl_opr('AdaptivePooling',
        inputs=[Doc('src', 'input image, shape (n, c, ih, iw)'),
                Doc('out_shape', 'output image shape, containing two elements specifying output height and width.')],
        params='AdaptivePooling',
        desc='Adaptive Pooling.'
        'The output shape is (n, c, oh, ow), where (oh, ow) is given by *out_shape*.')

150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
decl_opr('ROIPooling', outputs=[0],
         inputs=[Doc('src', 'input image, shape (n, c, ih, iw)'),
                 Doc('rois', 'regions of interest, shape (m, 5). '
                     'Note that rois[:, 0] denotes the input image index; we '
                     'store it as a float, but it should be an integral value.'
                     ' The rois[:, 1:5] are (x0, y0, x1, y1) for each ROI, '
                     'which would be multiplied by the scale value given in '
                     'param.'),
                 Doc('dest_shape', 'a var to describe output shape, should '
                     'contain exactly two elements')],
         params='ROIPooling',
         desc='ROI pooling, see '
         'https://github.com/rbgirshick/caffe-fast-rcnn. '
         'The output shape is (m, c, oh, ow), where (oh, ow) is given by '
         '*dest_shape*.')


decl_opr('Convolution3D',
         inputs=[Doc('src',
                     'input image in (batch, channel, depth, row, col) format'),
                 Doc('filter',
                     'convolution kernel in '
                     '(out channel, in channel, kern depth, kern row, kern col) format')],
         params=[('param', 'Convolution3D'),
                 ('execution_polity', 'ExecutionPolicy')],
         desc='batched convolution on channeled 3D images')


decl_opr('Convolution3DBackwardData',
         pyname='deconvolution3d',
         inputs=[Doc('src',
                     'input image in (batch, channel, depth, row, col) format'),
                 Doc('filter',
                     'convolution kernel in '
                     '(out channel, in channel, kern depth, kern row, kern col) format')],
         params=[('param', 'Convolution3D'),
                 ('execution_policy', 'ExecutionPolicy')],
         body=[
             'a, b = all_inputs',
             'all_inputs = [b, a]'
         ],
         desc='batched deconvolution on channeled 3D images; the underlying '
         'computation is in fact gradient of convolution w.r.t. data')

decl_opr('ConvBiasForward',
         pyname='conv_bias_activation_v1',
         inputs=[
             Doc('src', 'input image, allow NCHW, NHWC, NCHW4'),
             Doc('filter', 'filter'),
             Doc('bias', 'bias'),
         ],
         params=[('param', 'ConvBiasV1'),
                 ('execution_policy', 'ExecutionPolicy')],
         desc=('activation(convolution(src, filter) + bias) with specified '
               'dtype'),
         has_out_dtype=True)

decl_opr('ConvBiasForward',
         pyname='conv_bias_activation',
         inputs=[
             Doc('src', 'input image, allow NCHW, NHWC, NCHW4'),
             Doc('filter', 'filter'),
             Doc('bias', 'bias'),
         ],
         params=[('param', 'ConvBias'),
                 ('execution_policy', 'ExecutionPolicy')],
         desc=('activation(convolution(src, filter) + bias) with specified '
               'dtype'),
         version=3, has_out_dtype=True)

decl_opr('BatchNorm',
         pyname='batch_norm',
         inputs=['x', 'scale', 'bias', 'running_mean', 'running_variance'],
         desc=('batch normalization similar to cudnn, all params '
               'have the same definition with cudnnBatchNormalization. '
               'It has five outputs: running_mean, running_variance, '
               'save_mean, save_inv_variance, y.'),
         params='BN')

decl_opr('BatchNorm',
         pyname='batch_norm_no_statistic',
         inputs=['x', 'scale', 'bias'],
         desc=('batch noamlization and no need to update mean and variance. '
               'It has three outputs: save_mean, save_inv_variance, y.'),
         params='BN')

decl_opr('LocalShareForward',
         pyname='local_share',
         inputs=[Doc('src', 'input image in (batch, channel, row, col) format'),
                 Doc('filter', 'local share weights in '
                     '(spatial_groups_h, spatial_groups_w, in channel, kern row, kern col, out channel) format')],
         params=[('param', 'LocalShare'),
                 ('execution_policy', 'ExecutionPolicy')],
         desc=Doc(None,
r"""
    Apply a spatial group convolution of input tensor and filter tensor. The output tensor will be split into spatial_groups_hxspatial_groups_w groups. Output locations in the same spatial group share same weights. And weights corresponding to different spatial groups are different.
    Assume input shape is :math:`(N, IC, IH, IW)` and spatial groups in horizontal and vertical directions are :math:`(spatial_groups_h, spatial_groups_w)`,
    then filter shape would be :math:`(spatial_groups_h, spatial_groups_w, IC, FH, FW, OC)` and output shape would be :math:`(N, OC, OH, OW)` where :math:`(OH, OW)` would be computed from padding, stride, :math:`(FH, FW)` and :math:`(IH, IW)`, as in convolution.
    for each output location, we have;

    .. math::

        out_{n, oc, oh, ow} &= \sum_{ic=0}^{IC}\sum_{kh=0}^{FH}\sum_{kw=0}^{FW}src_{n, ic, ih+kh, iw+kw}
                            * filter_{grp_h, grp_w, ic, kh, kw, oc} \\\\
        \\text{where} & ih=-pad_h+oh \\times stride_h \\\\
                      & iw=-pad_w+ow \\times stride_w \\\\
                      & grp_h = oh / (OH / spatial_groups_h) \\\\
                      & grp_w = ow / (OW / spatial_groups_w)
"""),
         has_out_dtype=True)

decl_opr('ROIAlign', outputs=[0],
         inputs=[Doc('src', 'input image, shape (n, c, ih, iw)'),
                 Doc('rois', 'regions of interest, shape (m, 5). '
                     'Note that rois[:, 0] denotes the input image index; we '
                     'store it as a float, but it should be an integral value.'
                     ' The rois[:, 1:5] are (x0, y0, x1, y1) for each ROI, '
                     'which would be multiplied by the scale value given in '
268
                     'param.')],
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
         params='ROIAlign',
         desc='ROI Align, see '
         'Mask-RCNN: https://arxiv.org/pdf/1703.06870.pdf, '
         'The output shape is (m, c, pooled_height, pooled_width), where (pooled_height, pooled_width) is given by '
         '*Param*.')

decl_opr('DeformableConvForward',
         pyname='deformable_conv',
         inputs=[Doc('im', 'input feature map in (batch, channel, row, col) format'),
                 Doc('filter', 'weights in (output channel, input channel, filter row, filter col) or (group, output channel per group, input channel per group, filter row, filter col) format'),
                 Doc('offset', 'deformable offset in (batch, deformable group * filter row * filter col * 2, output row, output col) format'),
                 Doc('mask', 'deformable mask in (batch, deformable group * filter row * filter col, output row, output col) format')],
         params=[('param', 'Convolution'), ('execution_policy', 'ExecutionPolicy')],
         desc=Doc(None, r""" Apply a deformable convolution to input tensor and filter tensor. The offset tensor will adjust the position of each grid of a convolution filter. The mask tensor will be applied to the deformed input tensor. """),
         has_out_dtype=True)

decl_opr('DeformablePSROIPoolingForward',
         pyname='deformable_psroi_pooling',
         inputs=[Doc('data', 'input feature map in (batch, channel, row, col) format'),
                 Doc('rois', 'region of interest in (bbox count, 5) format'),
                 Doc('trans', 'bbox position transform parameter in (bbox count, 2, pooled_h, pooled_w) format')],
         params=[('param', 'DeformablePSROIPooling')],
         desc=Doc(None, r""" PSROIPooling with a bbox deformation. """),
         has_out_dtype=True)

decl_opr('BatchConvBiasForward',
         pyname='batch_conv_bias_activation',
         inputs=[Doc('src', 'input image in (batch, channel//4, row, col, 4) format'),
                 Doc('filter', 'weights unshared in batch dimension'
                     '(batch, out_channel, in_channel//4, kern row, kern col, 4) format'),
                 Doc('bias', 'bias'),
         ],
         params=[('param', 'BatchConvBias'),
                 ('execution_policy', 'ExecutionPolicy')],
         desc=Doc(None,
r"""
305
    Apply a convolution of input tensor and filter tensor whose weights are not shared in batch dimensions. Outputs with batch index use the same weight.
306 307 308 309 310 311 312 313 314 315 316 317 318 319
    Assume input shape is :math:`(N, IC, IH, IW)` and filter shape is :math:`(batch, OC, IC, FH, FW)`, the output shape will be :math:`(N, OC, OH, OW)` where :math:`(OH, OW)` would be computed from padding, stride, :math:`(FH, FW)` and :math:`(IH, IW)`, as in convolution.
    for each output location, we have;

    .. math::

        out_{n, oc, oh, ow} &= \sum_{ic=0}^{IC}\sum_{kh=0}^{FH}\sum_{kw=0}^{FW}src_{n, ic, ih+kh, iw+kw}
                            * filter_{n, oc, ic, kh, kw} \\\\
        \\text{where} & ih=-pad_h+oh \\times stride_h \\\\
                      & iw=-pad_w+ow \\times stride_w
"""),
         has_out_dtype=True)


# vim: ft=python