diff --git a/python_module/megengine/module/parampack.py b/python_module/megengine/module/parampack.py
index 01b2c55551a66872fd157a20acbb7ab46791c64a..b6025022ac753f27362ae582d35893649fd4612e 100644
--- a/python_module/megengine/module/parampack.py
+++ b/python_module/megengine/module/parampack.py
@@ -17,7 +17,10 @@ from .module import Module
 
 
 class ParamPack(Module):
-    r"""Pack module's parameters
+    r"""Pack module's parameters by gathering their memory to continuous address.
+    Using (device, dtype, requires_grad) as key, for example ('gpu0', float32, True),
+    parameters with same key will be packed togather.
+    It helps a lot for multimachine training by speeding up allreduce gradients.
 
     :param model: the module you want to pack parameters.
     :param nr_ignore_first: how many parameters will be unpacked at first.