From 3f3a256e0f59f5e231e75760f340490c22ff837a Mon Sep 17 00:00:00 2001
From: Megvii Engine Team <megengine@megvii.com>
Date: Mon, 26 Jul 2021 20:44:38 +0800
Subject: [PATCH] fix(mge/functional): fix conv* dtype promotion

GitOrigin-RevId: 3f03790cfc2ecf2f2c05e1ea5a68be0bc0e84bb2
---
 imperative/python/megengine/functional/nn.py  | 46 +++++++++++++++++--
 imperative/python/megengine/module/conv.py    |  6 +--
 .../python/test/unit/module/test_conv.py      | 31 +++++++++++++
 3 files changed, 75 insertions(+), 8 deletions(-)

diff --git a/imperative/python/megengine/functional/nn.py b/imperative/python/megengine/functional/nn.py
index 79ca60a05..b87ded3ed 100644
--- a/imperative/python/megengine/functional/nn.py
+++ b/imperative/python/megengine/functional/nn.py
@@ -9,7 +9,7 @@
 # pylint: disable=too-many-lines
 from typing import Optional, Sequence, Tuple, Union
 
-from ..core._imperative_rt.core2 import apply
+from ..core._imperative_rt.core2 import apply, dtype_promotion
 from ..core.ops import builtin
 from ..core.ops.builtin import BatchNorm, Elemwise
 from ..core.ops.special import Const
@@ -157,6 +157,12 @@ def conv1d(
     if amp._enabled:
         compute_mode = "float32"
         inp, weight, bias = cast_tensors(inp, weight, bias)
+    else:
+        dtype = dtype_promotion(inp, weight)
+        if inp.dtype != dtype:
+            inp = inp.astype(dtype)
+        if weight.dtype != dtype:
+            weight = weight.astype(dtype)
 
     inp = expand_dims(inp, 3)
     weight = expand_dims(weight, 3)
@@ -211,7 +217,7 @@ def conv2d(
     :param padding: size of the paddings added to the input on both sides of its
         spatial dimensions. Only zero-padding is supported. Default: 0
     :param dilation: dilation of the 2D convolution operation. Default: 1
-    :param groups: number of groups into which the input and output channels are divided, 
+    :param groups: number of groups into which the input and output channels are divided,
         so as to perform a ``grouped convolution``. When ``groups`` is not 1,
         ``in_channels`` and ``out_channels`` must be divisible by ``groups``,
         and the shape of weight should be ``(groups, out_channel // groups,
@@ -234,6 +240,12 @@ def conv2d(
     if amp._enabled:
         compute_mode = "float32"
         inp, weight, bias = cast_tensors(inp, weight, bias)
+    else:
+        dtype = dtype_promotion(inp, weight)
+        if inp.dtype != dtype:
+            inp = inp.astype(dtype)
+        if weight.dtype != dtype:
+            weight = weight.astype(dtype)
 
     stride_h, stride_w = expand_hw(stride)
     pad_h, pad_w = expand_hw(padding)
@@ -297,6 +309,12 @@ def conv3d(
     stride = _triple_nonzero(stride)
     dilate = _triple_nonzero(dilation)
 
+    dtype = dtype_promotion(inp, weight)
+    if inp.dtype != dtype:
+        inp = inp.astype(dtype)
+    if weight.dtype != dtype:
+        weight = weight.astype(dtype)
+
     sparse_type = "dense" if groups == 1 else "group"
     op = builtin.Convolution3D(
         pad_d=pad[D],
@@ -341,7 +359,7 @@ def conv_transpose2d(
     :param padding: size of the paddings added to the input on both sides of its
         spatial dimensions. Only zero-padding is supported. Default: 0
     :param dilation: dilation of the 2D convolution operation. Default: 1
-    :param groups: number of groups into which the input and output channels are divided, 
+    :param groups: number of groups into which the input and output channels are divided,
         so as to perform a ``grouped convolution``. When ``groups`` is not 1,
         ``in_channels`` and ``out_channels`` must be divisible by groups,
         and the shape of weight should be ``(groups, in_channels // groups,
@@ -364,6 +382,12 @@ def conv_transpose2d(
     if amp._enabled:
         compute_mode = "float32"
         inp, weight, bias = cast_tensors(inp, weight, bias)
+    else:
+        dtype = dtype_promotion(inp, weight)
+        if inp.dtype != dtype:
+            inp = inp.astype(dtype)
+        if weight.dtype != dtype:
+            weight = weight.astype(dtype)
 
     if groups != 1:
         raise NotImplementedError("group transposed conv2d is not supported yet.")
@@ -413,7 +437,7 @@ def deformable_conv2d(
     :param padding: size of the paddings added to the input on both sides of its
         spatial dimensions. Only zero-padding is supported. Default: 0
     :param dilation: dilation of the 2D convolution operation. Default: 1
-    :param groups: number of groups into which the input and output channels are divided, 
+    :param groups: number of groups into which the input and output channels are divided,
         so as to perform a ``grouped convolution``. When ``groups`` is not 1,
         ``in_channels`` and ``out_channels`` must be divisible by groups,
         and the shape of weight should be ``(groups, out_channel // groups,
@@ -482,6 +506,12 @@ def local_conv2d(
     pad_h, pad_w = expand_hw(padding)
     dilate_h, dilate_w = expand_hw(dilation)
 
+    dtype = dtype_promotion(inp, weight)
+    if inp.dtype != dtype:
+        inp = inp.astype(dtype)
+    if weight.dtype != dtype:
+        weight = weight.astype(dtype)
+
     op = builtin.GroupLocal(
         stride_h=stride_h,
         stride_w=stride_w,
@@ -507,7 +537,7 @@ def conv_transpose3d(
     dilation: Union[int, Tuple[int, int, int]] = 1,
 ) -> Tensor:
     """
-    3D transposed convolution operation. Only support the case that groups = 1 
+    3D transposed convolution operation. Only support the case that groups = 1
     and conv_mode = "cross_correlation".
 
     Refer to :class:`~.ConvTranspose3d` for more information.
@@ -527,6 +557,12 @@ def conv_transpose3d(
     stride = _triple_nonzero(stride)
     dilate = _triple_nonzero(dilation)
 
+    dtype = dtype_promotion(inp, weight)
+    if inp.dtype != dtype:
+        inp = inp.astype(dtype)
+    if weight.dtype != dtype:
+        weight = weight.astype(dtype)
+
     op = builtin.Convolution3DBackwardData(
         pad_d=pad[D],
         pad_h=pad[H],
diff --git a/imperative/python/megengine/module/conv.py b/imperative/python/megengine/module/conv.py
index c292cfc42..861c92698 100644
--- a/imperative/python/megengine/module/conv.py
+++ b/imperative/python/megengine/module/conv.py
@@ -269,11 +269,11 @@ class Conv2d(_ConvNd):
     output: :math:`(N, C_{\text{out}}, H_{\text{out}}, W_{\text{out}})` where
 
     .. math::
-        \text{H}_{out} = \lfloor \frac{\text{H}_{in} + 2 * \text{padding[0]} - 
+        \text{H}_{out} = \lfloor \frac{\text{H}_{in} + 2 * \text{padding[0]} -
         \text{dilation[0]} * (\text{kernel_size[0]} - 1) - 1}{\text{stride[0]}} + 1 \rfloor
 
     .. math::
-        \text{W}_{out} = \lfloor \frac{\text{W}_{in} + 2 * \text{padding[1]} - 
+        \text{W}_{out} = \lfloor \frac{\text{W}_{in} + 2 * \text{padding[1]} -
         \text{dilation[1]} * (\text{kernel_size[1]} - 1) - 1}{\text{stride[1]}} + 1 \rfloor
 
     When `groups == in_channels` and `out_channels == K * in_channels`,
@@ -939,7 +939,7 @@ class ConvTranspose3d(_ConvNd):
         ichl = self.in_channels
         ochl = self.out_channels
         kt, kh, kw = self.kernel_size
-        return (ochl, ichl, kt, kh, kw)
+        return (ichl, ochl, kt, kh, kw)
 
     def _infer_bias_shape(self):
         # Assume format is NCTHW
diff --git a/imperative/python/test/unit/module/test_conv.py b/imperative/python/test/unit/module/test_conv.py
index b36c2496b..f2143ae18 100644
--- a/imperative/python/test/unit/module/test_conv.py
+++ b/imperative/python/test/unit/module/test_conv.py
@@ -9,11 +9,41 @@
 import itertools
 
 import numpy as np
+import pytest
 
+import megengine.module as M
 from megengine import Parameter, tensor
+from megengine.functional.debug_param import (
+    get_execution_strategy,
+    set_execution_strategy,
+)
 from megengine.module import ConvTranspose2d, ConvTranspose3d, LocalConv2d
 
 
+@pytest.fixture
+def reproducible():
+    old = get_execution_strategy()
+    set_execution_strategy("HEURISTIC_REPRODUCIBLE")
+    yield
+    set_execution_strategy(old)
+
+
+# NOTE: test in module for convenience. should really test in functional
+@pytest.mark.parametrize(
+    "name",
+    ["Conv1d", "Conv2d", "Conv3d", "ConvTranspose2d", "ConvTranspose3d", "LocalConv2d"],
+)
+def test_conv_dtype_promotion(name, reproducible):
+    N, Ci, Co, K = 2, 16, 32, 3
+    S = (7,) * int(name[-2])
+    if "Local" in name:
+        m = getattr(M, name)(Ci, Co, *S, K)
+    else:
+        m = getattr(M, name)(Ci, Co, K)
+    x = tensor(np.random.random(size=(N, Ci) + S).astype("float16"))
+    np.testing.assert_equal(m(x).numpy(), m(x.astype("float32")).numpy())
+
+
 def test_conv_transpose2d():
     SH, SW = 3, 1
     PH, PW = 2, 0
@@ -163,6 +193,7 @@ def test_conv_transpose3d():
             )
         out_np = out_np[:, :, PD : OD - PD, PH : OH - PH, PW : OW - PW]
 
+        assert conv_transpose3d.weight.numpy().shape == weight.shape
         conv_transpose3d.weight = Parameter(weight)
         out_meg = conv_transpose3d.forward(tensor(inp))
 
-- 
GitLab