diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py index 52468ab5334967b35532711133a380b54706f8de..c4d42f90615fc1251ff19126bf992b38cc02d11b 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py @@ -1621,13 +1621,8 @@ class ShardingOptimizer(MetaOptimizerBase): persistable=True, force_cpu=True) - cond_var = layers.create_global_var( - name="gradient_merge_cond", - shape=[1], - value=bool(0), - dtype='bool', - persistable=False, - force_cpu=True) + cond_var = main_block.create_var( + name="gradient_merge_cond", shape=[1], dtype='bool') with device_guard("cpu"): # step_var = (step_var + 1) % k_step diff --git a/python/paddle/distributed/passes/auto_parallel_gradient_merge.py b/python/paddle/distributed/passes/auto_parallel_gradient_merge.py index 7668dff36207ed700f5aa6378cb0f5532cfedd3f..accac81133825cd41a826e9efeed847a70ef4a22 100644 --- a/python/paddle/distributed/passes/auto_parallel_gradient_merge.py +++ b/python/paddle/distributed/passes/auto_parallel_gradient_merge.py @@ -107,13 +107,8 @@ def _get_gm_cond_var(main_program, k_steps, dist_context): force_cpu=True) set_var_dist_attr(dist_context, step_var, [-1], world_process_group.ranks) - cond_var = layers.create_global_var( - name="gradient_merge_cond", - shape=[1], - value=bool(0), - dtype='bool', - persistable=False, - force_cpu=True) + cond_var = main_block.create_var( + name="gradient_merge_cond", shape=[1], dtype='bool') set_var_dist_attr(dist_context, cond_var, [-1], world_process_group.ranks) with device_guard("cpu"): diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 7bf4608de89c9cb57e7f2309e6165dbb636ce41a..8242d8e3392ec02ebb0f335b099d24eebd9fff06 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -7098,13 +7098,8 @@ class GradientMergeOptimizer(object): persistable=True, force_cpu=True) - cond_var = layers.create_global_var( - name="gradient_merge_cond", - shape=[1], - value=bool(0), - dtype='bool', - persistable=False, - force_cpu=True) + cond_var = main_block.create_var( + name="gradient_merge_cond", shape=[1], dtype='bool') with device_guard("cpu"): # step_var = (step_var + 1) % k_step