fix(mge): fix F.nn.dropout train and inference bugs

GitOrigin-RevId: 9d9f246d7b759ae39a130742b52b10d3150ca5cc

fix(mge): fix F.nn.dropout train and inference bugs
GitOrigin-RevId: 9d9f246d7b759ae39a130742b52b10d3150ca5cc
76ce81e8 · Megvii Engine Team · 5431929e · 76ce81e8 · 76ce81e8
Showing with 31 addition and 13 deletion

imperative/python/megengine/functional/nn.py imperative/python/megengine/functional/nn.py +23 -9

imperative/python/test/unit/functional/test_functional.py imperative/python/test/unit/functional/test_functional.py +8 -4

未找到文件。
--- a/imperative/python/megengine/functional/nn.py
+++ b/imperative/python/megengine/functional/nn.py
@@ -13,7 +13,14 @@ from typing import NamedTuple, Optional, Sequence, Tuple, Union
 from ..core._imperative_rt.core2 import apply, dtype_promotion
 from ..core._imperative_rt.ops import SubgraphBuilder as _SubgraphBuilder
 from ..core.ops import builtin
-from ..core.ops.builtin import BatchNorm, Elemwise, GetVarShape, Reduce, TypeCvt
+from ..core.ops.builtin import (
+    BatchNorm,
+    Elemwise,
+    GetVarShape,
+    Identity,
+    Reduce,
+    TypeCvt,
+)
 from ..core.ops.special import Const
 from ..core.tensor import amp, megbrain_graph
 from ..core.tensor.array_method import _elwise_apply
@@ -1403,9 +1410,14 @@ def dropout(inp: Tensor, drop_prob: float, training: bool = True) -> Tensor:
        from megengine import tensor
        import megengine.functional as F
-        x = tensor(np.ones(10, dtype=np.float32))
+        # test training mode
-        out = F.dropout(x, 1./3.)
+        data = tensor(np.ones(10000000, dtype=np.float32))
-        print(out.numpy())
+        out = F.nn.dropout(data, 1.0 / 3.0, training=True)
+        assert not out.numpy().all()
+        # test eval mode
+        out = F.nn.dropout(data, 1.0 / 3.0, training=False)
+        assert out.numpy().all()
    Outputs:
@@ -1416,14 +1428,16 @@ def dropout(inp: Tensor, drop_prob: float, training: bool = True) -> Tensor:
    """
    assert 0 <= drop_prob < 1
-    if drop_prob == 0:
+    if not training or drop_prob == 0:
        return inp
+    # model in training mode, e.g. model.train()
    rv = uniform(size=inp.shape)
    mask = rv > drop_prob
-    inp *= mask.astype(inp.dtype)
+    ret = inp * mask.astype(inp.dtype)
-    if training:
+    ret *= 1 / (1 - drop_prob)
-        inp *= 1 / (1 - drop_prob)
-    return inp
+    return ret
 def one_hot(inp: Tensor, num_classes: int) -> Tensor:

--- a/imperative/python/test/unit/functional/test_functional.py
+++ b/imperative/python/test/unit/functional/test_functional.py
@@ -57,10 +57,14 @@ def test_where():
 def test_dropout():
-    data = tensor(np.ones(10, dtype=np.float32))
+    # test training mode
-    out = F.dropout(data, 1.0 / 3.0, training=False)
+    data = tensor(np.ones(10000000, dtype=np.float32))
+    out = F.nn.dropout(data, 1.0 / 3.0, training=True)
-    assert out.numpy().sum() >= 0.0
+    assert not out.numpy().all()
+    # test eval mode
+    out = F.nn.dropout(data, 1.0 / 3.0, training=False)
+    assert out.numpy().all()
 def test_matinv():