diff --git a/python_module/megengine/module/batchnorm.py b/python_module/megengine/module/batchnorm.py
index bae2533e6dff398d0e92e1fd7b469f7b1d1556fd..127dd77a0d45ea378a828c14d5ba6d8cb01766fe 100644
--- a/python_module/megengine/module/batchnorm.py
+++ b/python_module/megengine/module/batchnorm.py
@@ -136,7 +136,7 @@ class BatchNorm2d(_BatchNorm):
         This :attr:`momentum` argument is different from one used in optimizer
         classes and the conventional notion of momentum. Mathematically, the
         update rule for running statistics here is
-        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
+        :math:`\hat{x}_\text{new} = \text{momentum} \times \hat{x} + (1 - \text{momentum}) \times x_t`,
         where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
         new observed value.