From cd02d7c85e554dedb72591d9639e28e08edb51c4 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Fri, 15 Jan 2021 14:22:56 +0800 Subject: [PATCH] feat(mge/opr): support deformable conv2d/psroi_pooling GitOrigin-RevId: 501cadda76b84929af51db134f4b74dd73e1c2a3 --- imperative/python/megengine/functional/nn.py | 116 +++++++++++++++- .../python/megengine/module/__init__.py | 10 +- imperative/python/megengine/module/conv.py | 127 +++++++++++++++++- .../module/deformable_psroi_pooling.py | 46 +++++++ .../test/unit/functional/test_functional.py | 27 ++++ imperative/src/impl/ops/deformable_conv2d.cpp | 38 ++++++ .../src/impl/ops/deformable_psroi_pooling.cpp | 32 +++++ imperative/src/impl/ops/specializations.cpp | 2 +- src/core/include/megbrain/ir/ops.td | 4 + 9 files changed, 391 insertions(+), 11 deletions(-) create mode 100644 imperative/python/megengine/module/deformable_psroi_pooling.py create mode 100644 imperative/src/impl/ops/deformable_conv2d.cpp create mode 100644 imperative/src/impl/ops/deformable_psroi_pooling.cpp diff --git a/imperative/python/megengine/functional/nn.py b/imperative/python/megengine/functional/nn.py index c629bc70d..8a06c3ad9 100644 --- a/imperative/python/megengine/functional/nn.py +++ b/imperative/python/megengine/functional/nn.py @@ -44,6 +44,8 @@ __all__ = [ "batch_norm", "conv2d", "conv_transpose2d", + "deformable_conv2d", + "deformable_psroi_pooling", "dot", "dropout", "indexing_one_hot", @@ -119,7 +121,8 @@ def conv2d( :param padding: size of the paddings added to the input on both sides of its spatial dimensions. Only zero-padding is supported. Default: 0 :param dilation: dilation of the 2D convolution operation. Default: 1 - :param groups: number of groups into which the input and output channels are divided, so as to perform a ``grouped convolution``. When ``groups`` is not 1, + :param groups: number of groups into which the input and output channels are divided, + so as to perform a ``grouped convolution``. When ``groups`` is not 1, ``in_channels`` and ``out_channels`` must be divisible by ``groups``, and the shape of weight should be `(groups, out_channel // groups, in_channels // groups, height, width)`. @@ -141,7 +144,6 @@ def conv2d( pad_h, pad_w = expand_hw(padding) dilate_h, dilate_w = expand_hw(dilation) - Sparse = builtin.Convolution.Sparse sparse_type = "DENSE" if groups == 1 else "GROUP" op = builtin.Convolution( stride_h=stride_h, @@ -185,7 +187,8 @@ def conv_transpose2d( :param padding: size of the paddings added to the input on both sides of its spatial dimensions. Only zero-padding is supported. Default: 0 :param dilation: dilation of the 2D convolution operation. Default: 1 - :param groups: number of groups into which the input and output channels are divided, so as to perform a ``grouped convolution``. When ``groups`` is not 1, + :param groups: number of groups into which the input and output channels are divided, + so as to perform a ``grouped convolution``. When ``groups`` is not 1, ``in_channels`` and ``out_channels`` must be divisible by groups, and the shape of weight should be `(groups, out_channel // groups, in_channels // groups, height, width)`. Default: 1 @@ -226,6 +229,74 @@ def conv_transpose2d( return output +def deformable_conv2d( + inp: Tensor, + weight: Tensor, + offset: Tensor, + mask: Tensor, + bias: Optional[Tensor] = None, + stride: Union[int, Tuple[int, int]] = 1, + padding: Union[int, Tuple[int, int]] = 0, + dilation: Union[int, Tuple[int, int]] = 1, + groups: int = 1, + conv_mode="CROSS_CORRELATION", + compute_mode="DEFAULT", +) -> Tensor: + """ + Deformable Convolution. + + :param inp: input feature map. + :param weight: convolution kernel. + :param offset: input offset to kernel, channel of this tensor should match the deformable settings. + :param mask: input mask to kernel, channel of this tensor should match the deformable settings. + :param bias: bias added to the result of convolution (if given). + :param stride: stride of the 2D convolution operation. Default: 1 + :param padding: size of the paddings added to the input on both sides of its + spatial dimensions. Only zero-padding is supported. Default: 0 + :param dilation: dilation of the 2D convolution operation. Default: 1 + :param groups: number of groups into which the input and output channels are divided, + so as to perform a ``grouped convolution``. When ``groups`` is not 1, + ``in_channels`` and ``out_channels`` must be divisible by groups, + and the shape of weight should be `(groups, out_channel // groups, + in_channels // groups, height, width)`. Default: 1 + :type conv_mode: string or :class:`Convolution.Mode` + :param conv_mode: supports "CROSS_CORRELATION". Default: + "CROSS_CORRELATION" + :type compute_mode: string or + :class:`Convolution.ComputeMode` + :param compute_mode: when set to "DEFAULT", no special requirements will be + placed on the precision of intermediate results. When set to "FLOAT32", + "Float32" would be used for accumulator and intermediate result, but only + effective when input and output are of Float16 dtype. + :return: output tensor. + """ + assert conv_mode == "CROSS_CORRELATION" or conv_mode.name == "CROSS_CORRELATION" + assert compute_mode == "DEFAULT" or compute_mode.name == "DEFAULT" + + stride_h, stride_w = expand_hw(stride) + pad_h, pad_w = expand_hw(padding) + dilate_h, dilate_w = expand_hw(dilation) + + sparse_type = "DENSE" if groups == 1 else "GROUP" + op = builtin.DeformableConv( + stride_h=stride_h, + stride_w=stride_w, + pad_h=pad_h, + pad_w=pad_w, + dilate_h=dilate_h, + dilate_w=dilate_w, + strategy=get_conv_execution_strategy(), + mode=conv_mode, + compute_mode=compute_mode, + sparse=sparse_type, + ) + inp, weight, offset, mask = utils.convert_inputs(inp, weight, offset, mask) + (output,) = apply(op, inp, weight, offset, mask) + if bias is not None: + output += bias + return output + + def local_conv2d( inp: Tensor, weight: Tensor, @@ -380,6 +451,45 @@ def adaptive_avg_pool2d( return output +def deformable_psroi_pooling( + inp: Tensor, + rois: Tensor, + trans: Tensor, + no_trans: bool, + part_size: int, + pooled_h: int, + pooled_w: int, + sample_per_part: int, + spatial_scale: float, + trans_std: float = 0.1, +): + """ + Deformable PSROI(Position Sensitive Region of Interest) Pooling. + + :param inp: input feature map. + :param rois: the rois for feature pooling. + :param trans: input offset to psroi_pooling. + :param no_trans: check the phase of DeformablePSROIPooling. False to the + 1st phase, True to the 2nd phase. + :param part_size: part size. + :param sample_per_part: sample points of each part. + :param pooled_shape: kernel shape of convolution. + :param spatial_scale: the spatial_scale w.r.t input image. + :param trans_std: multiplier used in 2nd phase. + """ + op = builtin.DeformablePSROIPooling( + no_trans=no_trans, + part_size=part_size, + pooled_h=pooled_h, + pooled_w=pooled_w, + sample_per_part=sample_per_part, + spatial_scale=spatial_scale, + trans_std=trans_std, + ) + output, _ = apply(op, inp, rois, trans) + return output + + def prelu(inp: Tensor, weight: Tensor) -> Tensor: r""" Applies the element-wise PReLU function. diff --git a/imperative/python/megengine/module/__init__.py b/imperative/python/megengine/module/__init__.py index d91095d54..bc7ad560e 100644 --- a/imperative/python/megengine/module/__init__.py +++ b/imperative/python/megengine/module/__init__.py @@ -12,8 +12,16 @@ from .adaptive_pooling import AdaptiveAvgPool2d, AdaptiveMaxPool2d from .batch_matmul_activation import BatchMatMulActivation from .batchnorm import BatchNorm1d, BatchNorm2d, SyncBatchNorm from .concat import Concat -from .conv import Conv1d, Conv2d, ConvRelu2d, ConvTranspose2d, LocalConv2d +from .conv import ( + Conv1d, + Conv2d, + ConvRelu2d, + ConvTranspose2d, + DeformableConv2d, + LocalConv2d, +) from .conv_bn import ConvBn2d, ConvBnRelu2d +from .deformable_psroi_pooling import DeformablePSROIPooling from .dropout import Dropout from .elemwise import Elemwise from .embedding import Embedding diff --git a/imperative/python/megengine/module/conv.py b/imperative/python/megengine/module/conv.py index 247463651..77b7a2bb8 100644 --- a/imperative/python/megengine/module/conv.py +++ b/imperative/python/megengine/module/conv.py @@ -10,7 +10,14 @@ from typing import Tuple, Union import numpy as np -from ..functional import conv1d, conv2d, conv_transpose2d, local_conv2d, relu +from ..functional import ( + conv1d, + conv2d, + conv_transpose2d, + deformable_conv2d, + local_conv2d, + relu, +) from ..tensor import Parameter from ..utils.tuple_function import _pair, _pair_nonzero from . import init @@ -121,7 +128,8 @@ class Conv1d(_ConvNd): :param padding: size of the paddings added to the input on both sides of its spatial dimensions. Only zero-padding is supported. Default: 0 :param dilation: dilation of the 1D convolution operation. Default: 1 - :param groups: number of groups into which the input and output channels are divided, so as to perform a "grouped convolution". When ``groups`` is not 1, + :param groups: number of groups into which the input and output channels are divided, + so as to perform a "grouped convolution". When ``groups`` is not 1, ``in_channels`` and ``out_channels`` must be divisible by ``groups``, and there would be an extra dimension at the beginning of the weight's shape. Specifically, the shape of weight would be `(groups, @@ -250,15 +258,16 @@ class Conv2d(_ConvNd): In general, output feature maps' shapes can be inferred as follows: input: :math:`(N, C_{\text{in}}, H_{\text{in}}, W_{\text{in}})` + output: :math:`(N, C_{\text{out}}, H_{\text{out}}, W_{\text{out}})` where .. math:: \text{H}_{out} = \lfloor \frac{\text{H}_{in} + 2 * \text{padding[0]} - - \text{dilation[0]} * (\text{kernel_size[0]} - 1)}{\text{stride[0]}} + 1 \rfloor + \text{dilation[0]} * (\text{kernel_size[0]} - 1) - 1}{\text{stride[0]}} + 1 \rfloor .. math:: \text{W}_{out} = \lfloor \frac{\text{W}_{in} + 2 * \text{padding[1]} - - \text{dilation[1]} * (\text{kernel_size[1]} - 1)}{\text{stride[1]}} + 1 \rfloor + \text{dilation[1]} * (\text{kernel_size[1]} - 1) - 1}{\text{stride[1]}} + 1 \rfloor When `groups == in_channels` and `out_channels == K * in_channels`, where K is a positive integer, this operation is also known as depthwise @@ -277,7 +286,8 @@ class Conv2d(_ConvNd): :param padding: size of the paddings added to the input on both sides of its spatial dimensions. Only zero-padding is supported. Default: 0 :param dilation: dilation of the 2D convolution operation. Default: 1 - :param groups: number of groups into which the input and output channels are divided, so as to perform a "grouped convolution". When ``groups`` is not 1, + :param groups: number of groups into which the input and output channels are divided, + so as to perform a "grouped convolution". When ``groups`` is not 1, ``in_channels`` and ``out_channels`` must be divisible by ``groups``, and there would be an extra dimension at the beginning of the weight's shape. Specifically, the shape of weight would be `(groups, @@ -406,7 +416,8 @@ class ConvTranspose2d(_ConvNd): :param padding: size of the paddings added to the input on both sides of its spatial dimensions. Only zero-padding is supported. Default: 0 :param dilation: dilation of the 2D convolution operation. Default: 1 - :param groups: number of groups into which the input and output channels are divided, so as to perform a "grouped convolution". When ``groups`` is not 1, + :param groups: number of groups into which the input and output channels are divided, + so as to perform a "grouped convolution". When ``groups`` is not 1, ``in_channels`` and ``out_channels`` must be divisible by ``groups``, and there would be an extra dimension at the beginning of the weight's shape. Specifically, the shape of weight would be ``(groups, @@ -579,3 +590,107 @@ class ConvRelu2d(Conv2d): def forward(self, inp): return relu(self.calc_conv(inp, self.weight, self.bias)) + + +class DeformableConv2d(_ConvNd): + """ + Deformable Convolution. + + :param in_channels: number of input channels. + :param out_channels: number of output channels. + :param kernel_size: size of weight on spatial dimensions. If kernel_size is + an :class:`int`, the actual kernel size would be + `(kernel_size, kernel_size)`. Default: 1 + :param stride: stride of the 2D convolution operation. Default: 1 + :param padding: size of the paddings added to the input on both sides of its + spatial dimensions. Only zero-padding is supported. Default: 0 + :param dilation: dilation of the 2D convolution operation. Default: 1 + :param groups: number of groups into which the input and output channels are divided, + so as to perform a "grouped convolution". When ``groups`` is not 1, + ``in_channels`` and ``out_channels`` must be divisible by ``groups``, + and there would be an extra dimension at the beginning of the weight's + shape. Specifically, the shape of weight would be `(groups, + out_channel // groups, in_channels // groups, *kernel_size)`. + :param bias: whether to add a bias onto the result of convolution. Default: + True + :param conv_mode: Supports `CROSS_CORRELATION`. Default: + `CROSS_CORRELATION` + :param compute_mode: When set to "DEFAULT", no special requirements will be + placed on the precision of intermediate results. When set to "FLOAT32", + "Float32" would be used for accumulator and intermediate result, but only + effective when input and output are of float16 dtype. + """ + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: Union[int, Tuple[int, int]], + stride: Union[int, Tuple[int, int]] = 1, + padding: Union[int, Tuple[int, int]] = 0, + dilation: Union[int, Tuple[int, int]] = 1, + groups: int = 1, + bias: bool = True, + conv_mode: str = "CROSS_CORRELATION", + compute_mode: str = "DEFAULT", + ): + kernel_size = _pair_nonzero(kernel_size) + stride = _pair_nonzero(stride) + padding = _pair(padding) + dilation = _pair_nonzero(dilation) + self.conv_mode = conv_mode + self.compute_mode = compute_mode + super().__init__( + in_channels, + out_channels, + kernel_size, + stride, + padding, + dilation, + groups, + bias, + ) + + def _get_fanin(self): + kh, kw = self.kernel_size + ic = self.in_channels + return kh * kw * ic + + def _infer_weight_shape(self): + group = self.groups + ichl = self.in_channels + ochl = self.out_channels + kh, kw = self.kernel_size + if group == 1: + # Assume format is NCHW + return (ochl, ichl, kh, kw) + + assert ( + ichl % group == 0 and ochl % group == 0 + ), "invalid config: input_channels={} output_channels={} group={}".format( + ichl, ochl, group + ) + # Assume format is NCHW + return (group, ochl // group, ichl // group, kh, kw) + + def _infer_bias_shape(self): + # Assume format is NCHW + return (1, self.out_channels, 1, 1) + + def calc_conv(self, inp, weight, offset, mask, bias): + return deformable_conv2d( + inp, + weight, + offset, + mask, + bias, + self.stride, + self.padding, + self.dilation, + self.groups, + self.conv_mode, + self.compute_mode, + ) + + def forward(self, inp, offset, mask): + return self.calc_conv(inp, self.weight, offset, mask, self.bias) diff --git a/imperative/python/megengine/module/deformable_psroi_pooling.py b/imperative/python/megengine/module/deformable_psroi_pooling.py new file mode 100644 index 000000000..2791eddee --- /dev/null +++ b/imperative/python/megengine/module/deformable_psroi_pooling.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +from ..functional import deformable_psroi_pooling +from .module import Module + + +class DeformablePSROIPooling(Module): + def __init__( + self, + no_trans, + part_size, + pooled_h, + pooled_w, + sample_per_part, + spatial_scale, + trans_std: float = 0.1, + ): + super().__init__() + self.no_trans = no_trans + self.part_size = part_size + self.pooled_h = pooled_h + self.pooled_w = pooled_w + self.sample_per_part = sample_per_part + self.spatial_scale = spatial_scale + self.trans_std = trans_std + + def forward(self, inp, rois, trans): + return deformable_psroi_pooling( + inp, + rois, + trans, + self.no_trans, + self.part_size, + self.pooled_h, + self.pooled_w, + self.sample_per_part, + self.spatial_scale, + self.trans_std, + ) diff --git a/imperative/python/test/unit/functional/test_functional.py b/imperative/python/test/unit/functional/test_functional.py index d6df37361..dc43cd457 100644 --- a/imperative/python/test/unit/functional/test_functional.py +++ b/imperative/python/test/unit/functional/test_functional.py @@ -703,6 +703,33 @@ def test_argmxx_on_inf(): assert all(run_argmin() >= 0) +def test_deformable_psroi_pooling(): + inp = np.random.random((1, 256, 64, 64)).astype("float32") + rois = np.random.random((1, 5)).astype("float32") + trans = np.random.random((24, 2, 7, 7)).astype("float32") + + pooled_h = 7 + pooled_w = 7 + sample_per_part = 4 + no_trans = False + part_size = 7 + spatial_scale = 1.0 / 64 + trans_std = 0.1 + + y = F.deformable_psroi_pooling( + tensor(inp), + tensor(rois), + tensor(trans), + no_trans, + part_size, + pooled_h, + pooled_w, + sample_per_part, + spatial_scale, + trans_std, + ) + + def test_cvt_color(): def rgb2gray(rgb): return np.dot(rgb[..., :3], [0.299, 0.587, 0.114]) diff --git a/imperative/src/impl/ops/deformable_conv2d.cpp b/imperative/src/impl/ops/deformable_conv2d.cpp new file mode 100644 index 000000000..1bbb72556 --- /dev/null +++ b/imperative/src/impl/ops/deformable_conv2d.cpp @@ -0,0 +1,38 @@ +/** + * \file imperative/src/impl/ops/deformable_conv2d.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +#include "megbrain/imperative/ops/autogen.h" +#include "megbrain/opr/dnn/convolution.h" + +#include "../op_trait.h" + +namespace mgb::imperative { + +namespace { namespace deformableconv { +std::shared_ptr make_from_op_node(cg::OperatorNodeBase* node_) { + auto* node = &node_->cast_final_safe(); + return DeformableConv::make(node->param(), node->execution_policy()); +} + +auto apply_on_var_node( + const OpDef& def, + const VarNodeArray& inputs) { + auto&& dcn = static_cast(def); + mgb_assert(inputs.size() == 4); + return opr::DeformableConv::make(inputs[0], inputs[1], inputs[2], inputs[3], dcn.param(), dcn.policy()); +} + +OP_TRAIT_REG(DeformableConv, DeformableConv, opr::DeformableConv) + .make_from_op_node(make_from_op_node) + .apply_on_var_node(apply_on_var_node) + .fallback(); +}} // deformableconv + +} // namespace mgb::imperative diff --git a/imperative/src/impl/ops/deformable_psroi_pooling.cpp b/imperative/src/impl/ops/deformable_psroi_pooling.cpp new file mode 100644 index 000000000..8e946ff64 --- /dev/null +++ b/imperative/src/impl/ops/deformable_psroi_pooling.cpp @@ -0,0 +1,32 @@ +/** + * \file imperative/src/impl/ops/deformable_psroi_pooling.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +#include "megbrain/imperative/ops/autogen.h" +#include "megbrain/opr/dnn/roi_pooling.h" + +#include "../op_trait.h" + +namespace mgb::imperative { + +namespace { namespace deformable_psroi_pooling { +auto apply_on_var_node( + const OpDef& def, + const VarNodeArray& inputs) { + mgb_assert(inputs.size() == 3); + auto&& op = static_cast(def); + return opr::DeformablePSROIPooling::make_all(inputs[0], inputs[1], inputs[2], op.param()); +} + +OP_TRAIT_REG(DeformablePSROIPooling, DeformablePSROIPooling) + .apply_on_var_node(apply_on_var_node) + .fallback(); +}} // deformable_psroi_pooling + +} // namespace mgb::imperative diff --git a/imperative/src/impl/ops/specializations.cpp b/imperative/src/impl/ops/specializations.cpp index e7f9088b9..54c084cb7 100644 --- a/imperative/src/impl/ops/specializations.cpp +++ b/imperative/src/impl/ops/specializations.cpp @@ -1,5 +1,5 @@ /** - * \file imperative/src/impl/ops/autogen.cpp + * \file imperative/src/impl/ops/specialzations.cpp * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. diff --git a/src/core/include/megbrain/ir/ops.td b/src/core/include/megbrain/ir/ops.td index dd0001b79..fced2ca44 100644 --- a/src/core/include/megbrain/ir/ops.td +++ b/src/core/include/megbrain/ir/ops.td @@ -48,6 +48,8 @@ def Convolution : MgbHashableOp<"Convolution", [ConvolutionParam, ExecutionPolic def ConvolutionBackwardData: MgbHashableOp<"ConvolutionBackwardData", [ConvolutionParam, ExecutionPolicyParamBase<"policy">]>; +def DeformableConv : MgbHashableOp<"DeformableConv", [ConvolutionParam, ExecutionPolicyParamBase<"policy">]>; + def GroupLocal: MgbHashableOp<"GroupLocal", [ConvolutionParam]>; def Pooling: MgbHashableOp<"Pooling", [PoolingParam]>; @@ -56,6 +58,8 @@ def AdaptivePooling : MgbHashableOp<"AdaptivePooling", [AdaptivePoolingParam]>; def ROIPooling: MgbHashableOp<"ROIPooling", [ROIPoolingParam]>; +def DeformablePSROIPooling : MgbHashableOp<"DeformablePSROIPooling", [DeformablePSROIPoolingParam]>; + def ConvBias : MgbHashableOp<"ConvBias", [ConvBiasParam, ExecutionPolicyParamBase<"policy">]> { let extraArguments = (ins MgbDTypeAttr:$dtype -- GitLab