未验证 提交 f172b02f 编写于 作者: L Leo Chen 提交者: GitHub

polish some code (#55583)

上级 b0193f3a
......@@ -132,7 +132,7 @@ REGISTER_SPMD_RULE(tanh_shrink, ElementwiseSPMDRule);
REGISTER_SPMD_RULE(thresholded_relu, ElementwiseSPMDRule);
REGISTER_SPMD_RULE(trunc, ElementwiseSPMDRule);
// matmul rule
// layer_norm rule
REGISTER_SPMD_RULE(layer_norm, LayerNormSPMDRule);
// replicated rule
......
......@@ -938,7 +938,7 @@ class Completer:
"""Complete annotation for the partial annotated serial_main_program.
Arguments:
serial_main_program: partial annotated serial_main_program.
Returns:e
Returns:
serial_main_program: completed annotated serial_main_program.
"""
......
......@@ -757,7 +757,7 @@ class Engine:
def _parallel(self, mode, all_ranks=False):
# Parallelize program based on the planner's results
# For now, the completer has to be passed to the planner,
# For now, the completer has to be passed to the Parallelizer,
# because we may use it to complete the annotation of the backward and update.
parallelizer = Parallelizer(
mode,
......
......@@ -40,7 +40,7 @@ class Planner:
def __init__(self, mode, dist_context):
self._mode = mode
self._dist_context = dist_context
self._load = False
self._load = False # load dist_attr from file
# NOTE: [HighOrderGrad]. There are grad ops in forward phase, and it need
# dependency of backward-forward ops in forward completion.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册