From 499727015619bd401688e4e0d85cef244c9c09cb Mon Sep 17 00:00:00 2001
From: Megvii Engine Team <megengine@megvii.com>
Date: Tue, 21 Apr 2020 14:32:41 +0800
Subject: [PATCH] docs(mge/module): refine the docstring of several apis

GitOrigin-RevId: ea04e05be44b3db8062bff04e3d5bd49a23c2c31
---
 python_module/megengine/module/activation.py |  6 +++---
 python_module/megengine/module/conv.py       |  2 +-
 python_module/megengine/module/dropout.py    |  4 ++--
 python_module/megengine/module/identity.py   |  2 ++
 python_module/megengine/module/init.py       | 16 ++++++++--------
 5 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/python_module/megengine/module/activation.py b/python_module/megengine/module/activation.py
index a6bba573d..b80c10a79 100644
--- a/python_module/megengine/module/activation.py
+++ b/python_module/megengine/module/activation.py
@@ -191,7 +191,7 @@ class LeakyReLU(Module):
     Applies the element-wise function:
 
     .. math::
-        \text{LeakyReLU}(x) = \max(0,x) + 0.01 * \min(0,x)
+        \text{LeakyReLU}(x) = \max(0,x) + negative\_slope \times \min(0,x)
 
     or
 
@@ -199,7 +199,7 @@ class LeakyReLU(Module):
         \text{LeakyReLU}(x) =
         \begin{cases}
         x, & \text{ if } x \geq 0 \\
-        0.01x, & \text{ otherwise }
+        negative\_slope \times x, & \text{ otherwise }
         \end{cases}
 
     Examples:
@@ -211,7 +211,7 @@ class LeakyReLU(Module):
         import megengine.module as M
         data = mge.tensor(np.array([-8, -12, 6, 10]).astype(np.float32))
 
-        leakyrelu = M.LeakyReLU()
+        leakyrelu = M.LeakyReLU(0.01)
         output = leakyrelu(data)
         print(output.numpy())
 
diff --git a/python_module/megengine/module/conv.py b/python_module/megengine/module/conv.py
index 9029c377c..fbeb50db1 100644
--- a/python_module/megengine/module/conv.py
+++ b/python_module/megengine/module/conv.py
@@ -204,7 +204,7 @@ class ConvTranspose2d(_ConvNd):
     with respect to its input.
 
     Convolution usually reduces the size of input, while transposed convolution works
-    the other way, transforming a smaller input to a larger output while preserving the
+    the opposite way, transforming a smaller input to a larger output while preserving the
     connectivity pattern.
 
     :param in_channels: number of input channels.
diff --git a/python_module/megengine/module/dropout.py b/python_module/megengine/module/dropout.py
index 5deb5ea8e..146eba245 100644
--- a/python_module/megengine/module/dropout.py
+++ b/python_module/megengine/module/dropout.py
@@ -11,9 +11,9 @@ from .module import Module
 
 
 class Dropout(Module):
-    r"""Randomly set input elements to zeros. Commonly used in large networks to prevent overfitting.
+    r"""Randomly set input elements to zeros with the probability :math:`drop\_prob` during training. Commonly used in large networks to prevent overfitting.
     Note that we perform dropout only during training, we also rescale(multiply) the output tensor
-    by :math:`\frac{1}{1 - p}`. During inference :class:`~.Dropout` is equal to :class:`~.Identity`.
+    by :math:`\frac{1}{1 - drop\_prob}`. During inference :class:`~.Dropout` is equal to :class:`~.Identity`.
 
     :param drop_prob: The probability to drop (set to zero) each single element
     """
diff --git a/python_module/megengine/module/identity.py b/python_module/megengine/module/identity.py
index 7d62ae240..51b31e505 100644
--- a/python_module/megengine/module/identity.py
+++ b/python_module/megengine/module/identity.py
@@ -11,5 +11,7 @@ from .module import Module
 
 
 class Identity(Module):
+    r"""A placeholder identity operator that will ignore any argument."""
+
     def forward(self, x):
         return identity(x)
diff --git a/python_module/megengine/module/init.py b/python_module/megengine/module/init.py
index 01c0bcb82..8c39443ed 100644
--- a/python_module/megengine/module/init.py
+++ b/python_module/megengine/module/init.py
@@ -176,8 +176,8 @@ def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None:
         a = \text{gain} \times \sqrt{\frac{6}{\text{fan_in} + \text{fan_out}}}
 
     Also known as Glorot initialization. Detailed information can be retrieved from
-    `Understanding the difficulty of training deep feedforward neural networks` -
-    Glorot, X. & Bengio, Y. (2010).
+    `"Understanding the difficulty of training deep feedforward neural networks" <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_.
+
 
     :param tensor: An n-dimentional tensor to be initialized
     :param gain: Scaling factor for :math:`a`.
@@ -196,8 +196,7 @@ def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None:
         \text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan_in} + \text{fan_out}}}
 
     Also known as Glorot initialization. Detailed information can be retrieved from
-    `Understanding the difficulty of training deep feedforward neural networks` -
-    Glorot, X. & Bengio, Y. (2010).
+    `"Understanding the difficulty of training deep feedforward neural networks" <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_.
 
     :param tensor: An n-dimentional tensor to be initialized
     :param gain: Scaling factor for :math:`std`.
@@ -217,8 +216,9 @@ def msra_uniform_(
         \text{bound} = \sqrt{\frac{6}{(1 + a^2) \times \text{fan_in}}}
 
     Detailed information can be retrieved from
-    `Delving deep into rectifiers: Surpassing human-level performance on ImageNet
-    classification`
+    `"Delving deep into rectifiers: Surpassing human-level performance on ImageNet
+    classification" <https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf>`_.
+
 
     :param tensor: An n-dimentional tensor to be initialized
     :param a: Optional parameter for calculating gain for leaky_relu. See
@@ -246,8 +246,8 @@ def msra_normal_(
         \text{std} = \sqrt{\frac{2}{(1 + a^2) \times \text{fan_in}}}
 
     Detailed information can be retrieved from
-    `Delving deep into rectifiers: Surpassing human-level performance on ImageNet
-    classification`
+    `"Delving deep into rectifiers: Surpassing human-level performance on ImageNet
+    classification" <https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf>`_.
 
     :param tensor: An n-dimentional tensor to be initialized
     :param a: Optional parameter for calculating gain for leaky_relu. See
-- 
GitLab