diff --git a/python_module/megengine/module/parampack.py b/python_module/megengine/module/parampack.py index 01b2c55551a66872fd157a20acbb7ab46791c64a..b6025022ac753f27362ae582d35893649fd4612e 100644 --- a/python_module/megengine/module/parampack.py +++ b/python_module/megengine/module/parampack.py @@ -17,7 +17,10 @@ from .module import Module class ParamPack(Module): - r"""Pack module's parameters + r"""Pack module's parameters by gathering their memory to continuous address. + Using (device, dtype, requires_grad) as key, for example ('gpu0', float32, True), + parameters with same key will be packed togather. + It helps a lot for multimachine training by speeding up allreduce gradients. :param model: the module you want to pack parameters. :param nr_ignore_first: how many parameters will be unpacked at first.