diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/dygraph_sharding_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/dygraph_sharding_optimizer.py index e20e988f9162aa95bf9774f680591ba5018f1a4b..a2f6ba3d932c9a0a81131e27b2e755f198674238 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/dygraph_sharding_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/dygraph_sharding_optimizer.py @@ -24,10 +24,8 @@ from paddle.fluid.dygraph import base as imperative_base from ...utils.log_util import logger -g_shard_use_reduce = int(os.environ.get("FLAGS_shard_use_reduce", 0)) -logger.info(f"g_shard_use_reduce {g_shard_use_reduce}") -g_shard_norm_align_dp = int(os.environ.get("FLAGS_shard_norm_align_dp", 1)) -logger.info(f"g_shard_norm_align_dp {g_shard_norm_align_dp}") +g_shard_use_reduce = int(os.environ.get("FLAGS_shard_use_reduce", 1)) +g_shard_norm_align_dp = int(os.environ.get("FLAGS_shard_norm_align_dp", 0)) if g_shard_norm_align_dp: assert ( diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py index 5fc568d4f8e6cf013c4a05b4e7e282b79ffaa9ed..a903d8bdaa572416426771949c5a80e1774c5120 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py @@ -41,8 +41,7 @@ from ...utils.mix_precision_utils import MixPrecisionOptimizer __all__ = [] -g_shard_norm_align_dp = int(os.environ.get("FLAGS_shard_norm_align_dp", 1)) -logger.info(f"g_shard_norm_align_dp {g_shard_norm_align_dp}") +g_shard_norm_align_dp = int(os.environ.get("FLAGS_shard_norm_align_dp", 0)) class HybridParallelClipGrad: diff --git a/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py b/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py index 12c8055e7bd23efcfb171090632cc3975e126b29..3710a9014c4129e2a1eb9cb47c5f50b553ba77ad 100644 --- a/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py +++ b/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py @@ -31,8 +31,7 @@ from .pp_utils.utils import HOOK_ACTION, FusedCommBuffer, assign_group_by_size __all__ = [] -g_shard_use_reduce = int(os.environ.get("FLAGS_shard_use_reduce", 0)) -logger.info(f"g_shard_use_reduce {g_shard_use_reduce}") +g_shard_use_reduce = int(os.environ.get("FLAGS_shard_use_reduce", 1)) # assume only the first stage and last stage need data, and data consumption are ordred;