From 499727015619bd401688e4e0d85cef244c9c09cb Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Tue, 21 Apr 2020 14:32:41 +0800 Subject: [PATCH] docs(mge/module): refine the docstring of several apis GitOrigin-RevId: ea04e05be44b3db8062bff04e3d5bd49a23c2c31 --- python_module/megengine/module/activation.py | 6 +++--- python_module/megengine/module/conv.py | 2 +- python_module/megengine/module/dropout.py | 4 ++-- python_module/megengine/module/identity.py | 2 ++ python_module/megengine/module/init.py | 16 ++++++++-------- 5 files changed, 16 insertions(+), 14 deletions(-) diff --git a/python_module/megengine/module/activation.py b/python_module/megengine/module/activation.py index a6bba573d..b80c10a79 100644 --- a/python_module/megengine/module/activation.py +++ b/python_module/megengine/module/activation.py @@ -191,7 +191,7 @@ class LeakyReLU(Module): Applies the element-wise function: .. math:: - \text{LeakyReLU}(x) = \max(0,x) + 0.01 * \min(0,x) + \text{LeakyReLU}(x) = \max(0,x) + negative\_slope \times \min(0,x) or @@ -199,7 +199,7 @@ class LeakyReLU(Module): \text{LeakyReLU}(x) = \begin{cases} x, & \text{ if } x \geq 0 \\ - 0.01x, & \text{ otherwise } + negative\_slope \times x, & \text{ otherwise } \end{cases} Examples: @@ -211,7 +211,7 @@ class LeakyReLU(Module): import megengine.module as M data = mge.tensor(np.array([-8, -12, 6, 10]).astype(np.float32)) - leakyrelu = M.LeakyReLU() + leakyrelu = M.LeakyReLU(0.01) output = leakyrelu(data) print(output.numpy()) diff --git a/python_module/megengine/module/conv.py b/python_module/megengine/module/conv.py index 9029c377c..fbeb50db1 100644 --- a/python_module/megengine/module/conv.py +++ b/python_module/megengine/module/conv.py @@ -204,7 +204,7 @@ class ConvTranspose2d(_ConvNd): with respect to its input. Convolution usually reduces the size of input, while transposed convolution works - the other way, transforming a smaller input to a larger output while preserving the + the opposite way, transforming a smaller input to a larger output while preserving the connectivity pattern. :param in_channels: number of input channels. diff --git a/python_module/megengine/module/dropout.py b/python_module/megengine/module/dropout.py index 5deb5ea8e..146eba245 100644 --- a/python_module/megengine/module/dropout.py +++ b/python_module/megengine/module/dropout.py @@ -11,9 +11,9 @@ from .module import Module class Dropout(Module): - r"""Randomly set input elements to zeros. Commonly used in large networks to prevent overfitting. + r"""Randomly set input elements to zeros with the probability :math:`drop\_prob` during training. Commonly used in large networks to prevent overfitting. Note that we perform dropout only during training, we also rescale(multiply) the output tensor - by :math:`\frac{1}{1 - p}`. During inference :class:`~.Dropout` is equal to :class:`~.Identity`. + by :math:`\frac{1}{1 - drop\_prob}`. During inference :class:`~.Dropout` is equal to :class:`~.Identity`. :param drop_prob: The probability to drop (set to zero) each single element """ diff --git a/python_module/megengine/module/identity.py b/python_module/megengine/module/identity.py index 7d62ae240..51b31e505 100644 --- a/python_module/megengine/module/identity.py +++ b/python_module/megengine/module/identity.py @@ -11,5 +11,7 @@ from .module import Module class Identity(Module): + r"""A placeholder identity operator that will ignore any argument.""" + def forward(self, x): return identity(x) diff --git a/python_module/megengine/module/init.py b/python_module/megengine/module/init.py index 01c0bcb82..8c39443ed 100644 --- a/python_module/megengine/module/init.py +++ b/python_module/megengine/module/init.py @@ -176,8 +176,8 @@ def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: a = \text{gain} \times \sqrt{\frac{6}{\text{fan_in} + \text{fan_out}}} Also known as Glorot initialization. Detailed information can be retrieved from - `Understanding the difficulty of training deep feedforward neural networks` - - Glorot, X. & Bengio, Y. (2010). + `"Understanding the difficulty of training deep feedforward neural networks" `_. + :param tensor: An n-dimentional tensor to be initialized :param gain: Scaling factor for :math:`a`. @@ -196,8 +196,7 @@ def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None: \text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan_in} + \text{fan_out}}} Also known as Glorot initialization. Detailed information can be retrieved from - `Understanding the difficulty of training deep feedforward neural networks` - - Glorot, X. & Bengio, Y. (2010). + `"Understanding the difficulty of training deep feedforward neural networks" `_. :param tensor: An n-dimentional tensor to be initialized :param gain: Scaling factor for :math:`std`. @@ -217,8 +216,9 @@ def msra_uniform_( \text{bound} = \sqrt{\frac{6}{(1 + a^2) \times \text{fan_in}}} Detailed information can be retrieved from - `Delving deep into rectifiers: Surpassing human-level performance on ImageNet - classification` + `"Delving deep into rectifiers: Surpassing human-level performance on ImageNet + classification" `_. + :param tensor: An n-dimentional tensor to be initialized :param a: Optional parameter for calculating gain for leaky_relu. See @@ -246,8 +246,8 @@ def msra_normal_( \text{std} = \sqrt{\frac{2}{(1 + a^2) \times \text{fan_in}}} Detailed information can be retrieved from - `Delving deep into rectifiers: Surpassing human-level performance on ImageNet - classification` + `"Delving deep into rectifiers: Surpassing human-level performance on ImageNet + classification" `_. :param tensor: An n-dimentional tensor to be initialized :param a: Optional parameter for calculating gain for leaky_relu. See -- GitLab