diff --git a/python_module/megengine/functional/__init__.py b/python_module/megengine/functional/__init__.py index e322d99876f489ab306142e8a8e05fa92d4f0444..756dfc986db493bd49c3c08b855e223b4d80667a 100644 --- a/python_module/megengine/functional/__init__.py +++ b/python_module/megengine/functional/__init__.py @@ -67,6 +67,7 @@ from .nn import ( interpolate, leaky_relu, linear, + local_conv2d, matrix_mul, max_pool2d, one_hot, diff --git a/python_module/megengine/functional/nn.py b/python_module/megengine/functional/nn.py index 2e67647b3fd0b9134963bb42a4a63c36658bfbb2..51852253e67be433aece8bee11bc1fcd385552f6 100644 --- a/python_module/megengine/functional/nn.py +++ b/python_module/megengine/functional/nn.py @@ -171,6 +171,34 @@ def conv_transpose2d( return res +@wrap_io_tensor +def local_conv2d( + inp: Tensor, + weight: Tensor, + stride: Union[int, Tuple[int, int]] = 1, + padding: Union[int, Tuple[int, int]] = 0, + dilation: Union[int, Tuple[int, int]] = 1, + conv_mode="CROSS_CORRELATION", +) -> Tensor: + """Applies spatial 2D convolution over an image with untied kernels. + + Refer to :class:`~.LocalConv2d` for more information. + """ + ret = mgb.opr.group_local( + inp, + weight, + pad_h=padding[0], + pad_w=padding[1], + stride_h=stride[0], + stride_w=stride[1], + dilate_h=dilation[0], + dilate_w=dilation[1], + format="NCHW", + mode=conv_mode, + ) + return ret + + @wrap_io_tensor def max_pool2d( inp: Tensor, diff --git a/python_module/megengine/module/__init__.py b/python_module/megengine/module/__init__.py index f941d2fa1a35d60c61ad8e775905a75f796e124c..ae6a11940210c24f0cf80f9c1917d3cd46a8f885 100644 --- a/python_module/megengine/module/__init__.py +++ b/python_module/megengine/module/__init__.py @@ -9,7 +9,7 @@ from .activation import LeakyReLU, PReLU, ReLU, Sigmoid, Softmax from .batchnorm import BatchNorm1d, BatchNorm2d from .concat import Concat -from .conv import Conv2d, ConvTranspose2d +from .conv import Conv2d, ConvTranspose2d, LocalConv2d from .conv_bn_relu import ConvBn2d, ConvBnRelu2d from .dropout import Dropout from .elemwise import Elemwise diff --git a/python_module/megengine/module/conv.py b/python_module/megengine/module/conv.py index 26587ad280e8c23544e0af2a1b30775d8f0918f7..96748f768b6fd52216df61d9688d480fb656db17 100644 --- a/python_module/megengine/module/conv.py +++ b/python_module/megengine/module/conv.py @@ -14,7 +14,7 @@ import numpy as np import megengine._internal as mgb from ..core import Parameter -from ..functional import conv2d, conv_transpose2d +from ..functional import conv2d, conv_transpose2d, local_conv2d from ..utils.types import _pair, _pair_nonzero from . import init from .module import Module @@ -224,7 +224,7 @@ class ConvTranspose2d(_ConvNd): ``in_channels`` and ``out_channels`` must be divisible by ``groups``, and there would be an extra dimension at the beginning of the weight's shape. Specifically, the shape of weight would be ``(groups, - out_channel // groups, in_channels // groups, *kernel_size)``. Default: 1 + out_channels // groups, in_channels // groups, *kernel_size)``. Default: 1 :param bias: wether to add a bias onto the result of convolution. Default: True :param conv_mode: Supports `CROSS_CORRELATION` or `CONVOLUTION`. Default: @@ -306,3 +306,77 @@ class ConvTranspose2d(_ConvNd): self.conv_mode, self.compute_mode, ) + + +class LocalConv2d(Conv2d): + r"""Applies a spatial convolution with untied kernels over an input 4D tensor. + It is also known as the locally connected layer. + + :param in_channels: number of input channels. + :param out_channels: number of output channels. + :param input_height: the height of the input images. + :param input_width: the width of the input images. + :param kernel_size: size of weight on spatial dimensions. If ``kernel_size`` is + an :class:`int`, the actual kernel size would be + ``(kernel_size, kernel_size)``. Default: 1 + :param stride: stride of the 2D convolution operation. Default: 1 + :param padding: size of the paddings added to the input on both sides of its + spatial dimensions. Only zero-padding is supported. Default: 0 + :param groups: number of groups to divide input and output channels into, + so as to perform a "grouped convolution". When ``groups`` is not 1, + ``in_channels`` and ``out_channels`` must be divisible by ``groups``. + The shape of weight is ``(groups, output_height, output_width, + in_channels // groups, *kernel_size, out_channels // groups)``. + """ + + _conv_mode_type = mgb.opr_param_defs.Convolution.Mode + + def __init__( + self, + in_channels: int, + out_channels: int, + input_height: int, + input_width: int, + kernel_size: Union[int, Tuple[int, int]], + stride: Union[int, Tuple[int, int]] = 1, + padding: Union[int, Tuple[int, int]] = 0, + dilation: Union[int, Tuple[int, int]] = 1, + groups: int = 1, + conv_mode: str = "CROSS_CORRELATION", + ): + self.input_height = input_height + self.input_width = input_width + super().__init__( + in_channels, + out_channels, + kernel_size, + stride, + padding, + dilation, + groups, + bias=False, + ) + + def _infer_weight_shape(self): + group = self.groups + output_height = ( + self.input_height + self.padding[0] * 2 - self.kernel_size[0] + ) // self.stride[0] + 1 + output_width = ( + self.input_width + self.padding[1] * 2 - self.kernel_size[1] + ) // self.stride[1] + 1 + # Assume format is NCHW + return ( + group, + output_height, + output_width, + self.in_channels // group, + self.kernel_size[0], + self.kernel_size[1], + self.out_channels // group, + ) + + def forward(self, inp): + return local_conv2d( + inp, self.weight, self.stride, self.padding, self.dilation, self.conv_mode + ) diff --git a/python_module/test/unit/module/test_conv.py b/python_module/test/unit/module/test_conv.py index 354224f73a0e6e65974c8042d85fd42c14bae9e3..f67a8aaf86a19d55659fe31386ffc0ca99b40f87 100644 --- a/python_module/test/unit/module/test_conv.py +++ b/python_module/test/unit/module/test_conv.py @@ -11,7 +11,7 @@ import itertools import numpy as np from megengine import Parameter, tensor -from megengine.module import ConvTranspose2d +from megengine.module import ConvTranspose2d, LocalConv2d from megengine.test import assertTensorClose @@ -50,3 +50,61 @@ def test_conv_transpose2d(): y = conv_transpose2d(tensor(inp)) assertTensorClose(out, y.numpy(), max_err=2e-6) + + +def test_local_conv2d(): + batch_size = 10 + in_channels = 4 + out_channels = 8 + input_height = 8 + input_width = 8 + kernel_size = 3 + stride = 1 + padding = 1 + dilation = 1 + groups = 1 + local_conv2d = LocalConv2d( + in_channels=in_channels, + out_channels=out_channels, + input_height=input_height, + input_width=input_width, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + ) + inputs = np.random.normal( + size=(batch_size, in_channels, input_height, input_width) + ).astype(np.float32) + output_height = (input_height + padding * 2 - kernel_size) // stride + 1 + output_width = (input_width + padding * 2 - kernel_size) // stride + 1 + weights = np.random.normal( + size=( + groups, + output_height, + output_width, + in_channels // groups, + kernel_size, + kernel_size, + out_channels // groups, + ) + ).astype(np.float32) + local_conv2d.weight = Parameter(weights) + outputs = local_conv2d(tensor(inputs)) + # naive calculation use numpy + # only test output_height == input_height, output_width == input_width, group == 1 + inputs = np.pad(inputs, ((0, 0), (0, 0), (1, 1), (1, 1))) + expected = np.zeros( + (batch_size, out_channels, output_height, output_width), dtype=np.float32, + ) + for n, oc, oh, ow in itertools.product( + *map(range, [batch_size, out_channels, output_height, output_width]) + ): + ih, iw = oh * stride, ow * stride + expected[n, oc, ih, iw] = np.sum( + inputs[n, :, ih : ih + kernel_size, iw : iw + kernel_size] + * weights[0, oh, ow, :, :, :, oc] + ) + + assertTensorClose(outputs.numpy(), expected, max_err=1e-5) diff --git a/src/opr/impl/dnn/dnn.oprdecl b/src/opr/impl/dnn/dnn.oprdecl index 0c377295240b7b4ca38d3da06759b7bb5ee94c80..cadc8da0098112f83df5bd88717c36115c6f8c41 100644 --- a/src/opr/impl/dnn/dnn.oprdecl +++ b/src/opr/impl/dnn/dnn.oprdecl @@ -112,9 +112,10 @@ decl_opr('GroupLocal', 'convolution kernel in ' '(group, out row, out col, in channel / group, ' 'kern row, kern col, out channel / group) format')], - params='ConvolutionV0', + params=[('param', 'Convolution')], desc='batched convolution on groupped channeled 2D images, but ' - 'kernels are not shared across different output positions') + 'kernels are not shared across different output positions', + version=1) decl_opr('LRN', inputs=['src'],