diff --git a/python_module/megengine/module/batchnorm.py b/python_module/megengine/module/batchnorm.py index bae2533e6dff398d0e92e1fd7b469f7b1d1556fd..127dd77a0d45ea378a828c14d5ba6d8cb01766fe 100644 --- a/python_module/megengine/module/batchnorm.py +++ b/python_module/megengine/module/batchnorm.py @@ -136,7 +136,7 @@ class BatchNorm2d(_BatchNorm): This :attr:`momentum` argument is different from one used in optimizer classes and the conventional notion of momentum. Mathematically, the update rule for running statistics here is - :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`, + :math:`\hat{x}_\text{new} = \text{momentum} \times \hat{x} + (1 - \text{momentum}) \times x_t`, where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the new observed value.