diff --git a/paddle/fluid/distributed/auto_parallel/spmd_rules/rules.h b/paddle/fluid/distributed/auto_parallel/spmd_rules/rules.h index 6070539f7bc40b89d1101a4dce0bd77acebcd148..c58333d0fb7011623f28449cdc06b459af029f64 100644 --- a/paddle/fluid/distributed/auto_parallel/spmd_rules/rules.h +++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/rules.h @@ -132,7 +132,7 @@ REGISTER_SPMD_RULE(tanh_shrink, ElementwiseSPMDRule); REGISTER_SPMD_RULE(thresholded_relu, ElementwiseSPMDRule); REGISTER_SPMD_RULE(trunc, ElementwiseSPMDRule); -// matmul rule +// layer_norm rule REGISTER_SPMD_RULE(layer_norm, LayerNormSPMDRule); // replicated rule diff --git a/python/paddle/distributed/auto_parallel/static/completion.py b/python/paddle/distributed/auto_parallel/static/completion.py index d8636153ccf1f72041525134abde3d4420aade17..1f5ccc94c3c3e453cdb4a5a1579dc67fb01e1d3e 100644 --- a/python/paddle/distributed/auto_parallel/static/completion.py +++ b/python/paddle/distributed/auto_parallel/static/completion.py @@ -938,7 +938,7 @@ class Completer: """Complete annotation for the partial annotated serial_main_program. Arguments: serial_main_program: partial annotated serial_main_program. - Returns:e + Returns: serial_main_program: completed annotated serial_main_program. """ diff --git a/python/paddle/distributed/auto_parallel/static/engine.py b/python/paddle/distributed/auto_parallel/static/engine.py index ee3ab4ea208a716d31c61d2b9e2a7c4474850825..4c0c2e2e4732c9e9498c7b7bfb517f8aa15b51a3 100644 --- a/python/paddle/distributed/auto_parallel/static/engine.py +++ b/python/paddle/distributed/auto_parallel/static/engine.py @@ -757,7 +757,7 @@ class Engine: def _parallel(self, mode, all_ranks=False): # Parallelize program based on the planner's results - # For now, the completer has to be passed to the planner, + # For now, the completer has to be passed to the Parallelizer, # because we may use it to complete the annotation of the backward and update. parallelizer = Parallelizer( mode, diff --git a/python/paddle/distributed/auto_parallel/static/planner_v2.py b/python/paddle/distributed/auto_parallel/static/planner_v2.py index f0ac925371055b1cb5e408321a075725f7d31ee3..6e318fb56e15ad5eb968da80638dada0c31c6d80 100755 --- a/python/paddle/distributed/auto_parallel/static/planner_v2.py +++ b/python/paddle/distributed/auto_parallel/static/planner_v2.py @@ -40,7 +40,7 @@ class Planner: def __init__(self, mode, dist_context): self._mode = mode self._dist_context = dist_context - self._load = False + self._load = False # load dist_attr from file # NOTE: [HighOrderGrad]. There are grad ops in forward phase, and it need # dependency of backward-forward ops in forward completion.