diff --git a/python_module/megengine/optimizer/adam.py b/python_module/megengine/optimizer/adam.py index 587ec2f72b914b5879004df7cc31f716950b8604..6f264d3bc094ebf63103d31c5dae32f9b122bfb3 100644 --- a/python_module/megengine/optimizer/adam.py +++ b/python_module/megengine/optimizer/adam.py @@ -14,7 +14,7 @@ from .optimizer import Optimizer class Adam(Optimizer): - r"""Implements Adam algorithm. + r"""Implements Adam algorithm proposed in `"Adam: A Method for Stochastic Optimization" `_. :param params: iterable of parameters to optimize or dicts defining parameter groups. diff --git a/python_module/megengine/optimizer/internal.py b/python_module/megengine/optimizer/internal.py index 7e99b8f4d9e6bb1d7bb60eebcb713e0d3936fe0b..0483af9a1b5e985e160e9ffe114e42041e31f24d 100644 --- a/python_module/megengine/optimizer/internal.py +++ b/python_module/megengine/optimizer/internal.py @@ -21,7 +21,7 @@ def add_update_fastpath( beta: Union[Tensor, float, int] = 1.0, bias: Union[Tensor, float, int] = 0.0 ): - """a fast-path ONLY used to update parameters in optimzier, since it + """a fast-path ONLY used to update parameters in optimizer, since it would bypass computing graph and launch dnn/add_update kernel directly, it is more efficient than functional/add_update. """ diff --git a/python_module/megengine/optimizer/lr_scheduler.py b/python_module/megengine/optimizer/lr_scheduler.py index 677607c8559b792ef14d1c3c387209419859555f..7cdb6d9bb50d26a951dd446500326d962226bf6b 100644 --- a/python_module/megengine/optimizer/lr_scheduler.py +++ b/python_module/megengine/optimizer/lr_scheduler.py @@ -4,7 +4,7 @@ from .optimizer import Optimizer class LRScheduler(metaclass=ABCMeta): - r"""Base class for all lr_schedulers. + r"""Base class for all learning rate based schedulers. :param optimizer: Wrapped optimizer. :param current_epoch: The index of current epoch. Default: -1 diff --git a/python_module/megengine/optimizer/sgd.py b/python_module/megengine/optimizer/sgd.py index 0a24c20afe7f376737a2a374a26ebc6355cc45df..a1f807b368198b49978de3081448533fe9e689a5 100644 --- a/python_module/megengine/optimizer/sgd.py +++ b/python_module/megengine/optimizer/sgd.py @@ -17,7 +17,7 @@ class SGD(Optimizer): r"""Implements stochastic gradient descent. Nesterov momentum is based on the formula from - `On the importance of initialization and momentum in deep learning`. + `"On the importance of initialization and momentum in deep learning" `_ . :param params: iterable of parameters to optimize or dicts defining parameter groups.