diff --git a/python/paddle/distributed/passes/ps_server_pass.py b/python/paddle/distributed/passes/ps_server_pass.py index c243c0602ff39e6a1abca320eb7b1e62e0b0c0bd..3cc1a14dbcd911c38991cd43481f2114911cbb09 100755 --- a/python/paddle/distributed/passes/ps_server_pass.py +++ b/python/paddle/distributed/passes/ps_server_pass.py @@ -69,7 +69,7 @@ class AddLrDecayTablePass(PassBase): ] = tensor_table_class attrs['tensor_table'] = tensor_table_dict - def _get_lr_sheduler_program(self, lr_sheduler, lr_decay_steps): + def _get_lr_scheduler_program(self, lr_scheduler, lr_decay_steps): schedler_decay = [ 'NoamDecay', 'NaturalExpDecay', @@ -81,12 +81,12 @@ class AddLrDecayTablePass(PassBase): decay_startup_program = paddle.static.Program() lr_name = "" - if isinstance(lr_sheduler, ExponentialDecay): + if isinstance(lr_scheduler, ExponentialDecay): with paddle.static.program_guard( decay_main_program, decay_startup_program ): lr = exponential_decay( - 1.0, lr_decay_steps, lr_sheduler.gamma, True + 1.0, lr_decay_steps, lr_scheduler.gamma, True ) lr_name = lr.name logging.warn( @@ -96,24 +96,24 @@ class AddLrDecayTablePass(PassBase): "\t strategy.a_sync_configs= { 'lr_decay_steps' : YOUR_DECAY_STEP } \n" % lr_decay_steps ) - elif isinstance(lr_sheduler, NoamDecay): + elif isinstance(lr_scheduler, NoamDecay): with paddle.static.program_guard( decay_main_program, decay_startup_program ): lr = noam_decay( - lr_sheduler.d_model, lr_sheduler.warmup_steps, 1.0 + lr_scheduler.d_model, lr_scheduler.warmup_steps, 1.0 ) lr_name = lr.name logging.warn( "NoamDecay is set, warmup steps is [ %d ]" - % lr_sheduler.warmup_steps + % lr_scheduler.warmup_steps ) - elif isinstance(lr_sheduler, NaturalExpDecay): + elif isinstance(lr_scheduler, NaturalExpDecay): with paddle.static.program_guard( decay_main_program, decay_startup_program ): lr = natural_exp_decay( - 1.0, lr_decay_steps, lr_sheduler.gamma, True + 1.0, lr_decay_steps, lr_scheduler.gamma, True ) lr_name = lr.name logging.warn( @@ -123,12 +123,12 @@ class AddLrDecayTablePass(PassBase): "\t strategy.a_sync_configs= { 'lr_decay_steps' : YOUR_DECAY_STEP } \n" % lr_decay_steps ) - elif isinstance(lr_sheduler, InverseTimeDecay): + elif isinstance(lr_scheduler, InverseTimeDecay): with paddle.static.program_guard( decay_main_program, decay_startup_program ): lr = inverse_time_decay( - 1.0, lr_decay_steps, lr_sheduler.gamma, True + 1.0, lr_decay_steps, lr_scheduler.gamma, True ) lr_name = lr.name logging.warn( @@ -149,11 +149,11 @@ class AddLrDecayTablePass(PassBase): def _apply_single_impl(self, main_program, startup_program, pass_ctx): attrs = pass_ctx._attrs - if not hasattr(attrs['origin_main_program'], 'lr_sheduler'): + if not hasattr(attrs['origin_main_program'], 'lr_scheduler'): return assert isinstance( - attrs['origin_main_program'].lr_sheduler, LRScheduler + attrs['origin_main_program'].lr_scheduler, LRScheduler ), "must be LRScheduler" ops = get_optimize_ops(attrs['origin_main_program']) @@ -161,8 +161,8 @@ class AddLrDecayTablePass(PassBase): lr_decay_main_program, lr_decay_startup_program, lr_name, - ) = self._get_lr_sheduler_program( - attrs['origin_main_program'].lr_sheduler, attrs['lr_decay_steps'] + ) = self._get_lr_scheduler_program( + attrs['origin_main_program'].lr_scheduler, attrs['lr_decay_steps'] ) self._add_tensor_table( attrs, diff --git a/python/paddle/distributed/passes/ps_trainer_pass.py b/python/paddle/distributed/passes/ps_trainer_pass.py index 72cda1b85573ca53970777e8211894c7b887f3cc..a2db1219a9d6c1ec052b73152675f4a5e0bcb411 100755 --- a/python/paddle/distributed/passes/ps_trainer_pass.py +++ b/python/paddle/distributed/passes/ps_trainer_pass.py @@ -612,7 +612,7 @@ class DeleteOptimizesPass(PassBase): main_program, remote_optimize_ops, local_optimize_ops ) - if hasattr(attrs['origin_main_program'], 'lr_sheduler'): + if hasattr(attrs['origin_main_program'], 'lr_scheduler'): self._add_lr_var(main_program, attrs) diff --git a/python/paddle/fluid/compiler.py b/python/paddle/fluid/compiler.py index 4843b4e5692349ae1db68fd3a76367700157bbca..acdfa64769ce062eb70f1e498f69198ed12ea011 100644 --- a/python/paddle/fluid/compiler.py +++ b/python/paddle/fluid/compiler.py @@ -1235,15 +1235,15 @@ class IpuCompiledProgram: convert_pass.apply(self._graph) program = framework.Program._construct_from_desc(desc) - if hasattr(self._program, 'lr_sheduler'): + if hasattr(self._program, 'lr_scheduler'): # how to share var between two different block ? - lr_var_name = self._program.lr_sheduler._var_name + lr_var_name = self._program.lr_scheduler._var_name - program.lr_sheduler = self._program.lr_sheduler - # Program.clone will clone lr_sheduler, so i set lr_var as - # lr_sheduler attribute + program.lr_scheduler = self._program.lr_scheduler + # Program.clone will clone lr_scheduler, so i set lr_var as + # lr_scheduler attribute global_block = self._program.global_block() - program.lr_sheduler.lr_var = global_block.vars[lr_var_name] + program.lr_scheduler.lr_var = global_block.vars[lr_var_name] # with popart, we need to support batches_per_step, what means # the shape of feed_var and feed_tensor(maybe numpy array) will diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index 3b018f30c64938588bc3b8bc60529dbc19e8dccc..ce1c55bd1716882c2edd2090567e6a2102ac2b9f 100755 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -871,8 +871,8 @@ class _ExecutorCache: ir_graph = framework.IrGraph(compiled_program._graph) converted_program = ir_graph.to_program() - if hasattr(inner_program, 'lr_sheduler'): - converted_program.lr_sheduler = inner_program.lr_sheduler + if hasattr(inner_program, 'lr_scheduler'): + converted_program.lr_scheduler = inner_program.lr_scheduler inner_program = converted_program # print(f"Program after convert:\n {inner_program}", flush=True) @@ -1657,17 +1657,17 @@ class Executor: ) self._feed_data(program, feed, feed_var_name, scope) - if hasattr(program, 'lr_sheduler'): + if hasattr(program, 'lr_scheduler'): from paddle.optimizer.lr import LRScheduler assert isinstance( - program.lr_sheduler, LRScheduler + program.lr_scheduler, LRScheduler ), "must be LRScheduler" - lr_sheduler = program.lr_sheduler - lr_value = lr_sheduler() - lr_var = program.global_block().vars[lr_sheduler._var_name] + lr_scheduler = program.lr_scheduler + lr_value = lr_scheduler() + lr_var = program.global_block().vars[lr_scheduler._var_name] data = np.array([lr_value]).astype(convert_dtype(lr_var.dtype)) - tensor = core.get_variable_tensor(scope, lr_sheduler._var_name) + tensor = core.get_variable_tensor(scope, lr_scheduler._var_name) # NOTE(dev): `tensor.set(data, self.place)` always call TensorCopySync that is a blocking behavior. So we use `_copy_from` to replace it. cpu_tensor = _as_lodtensor(data, core.CPUPlace()) if core.is_cuda_graph_capturing(): @@ -1810,15 +1810,15 @@ class Executor: ) self._feed_data(program, feed, feed_var_name, scope) - if hasattr(program, 'lr_sheduler'): + if hasattr(program, 'lr_schedulerr'): assert isinstance( - program.lr_sheduler, LRScheduler + program.lr_scheduler, LRScheduler ), "must be LRScheduler" - lr_sheduler = program.lr_sheduler - lr_value = lr_sheduler() - lr_var = program.global_block().vars[lr_sheduler._var_name] + lr_scheduler = program.lr_scheduler + lr_value = lr_scheduler() + lr_var = program.global_block().vars[lr_scheduler._var_name] data = np.array([lr_value]).astype(convert_dtype(lr_var.dtype)) - tensor = core.get_variable_tensor(scope, lr_sheduler._var_name) + tensor = core.get_variable_tensor(scope, lr_scheduler._var_name) tensor.set(data, self.place) if not use_program_cache: @@ -2588,14 +2588,14 @@ class Executor: from paddle.optimizer.lr import LRScheduler - if hasattr(program, 'lr_sheduler'): - lr_sheduler = program.lr_sheduler - assert isinstance(lr_sheduler, LRScheduler), "must be LRScheduler" - lr_value = lr_sheduler() - lr_var = program.global_block().vars[lr_sheduler._var_name] + if hasattr(program, 'lr_scheduler'): + lr_scheduler = program.lr_scheduler + assert isinstance(lr_scheduler, LRScheduler), "must be LRScheduler" + lr_value = lr_scheduler() + lr_var = program.global_block().vars[lr_scheduler._var_name] data = np.array([lr_value]).astype(convert_dtype(lr_var.dtype)) tensor = core.get_variable_tensor( - cached_scope, lr_sheduler._var_name + cached_scope, lr_scheduler._var_name ) tensor.set(data, self.place) @@ -2732,13 +2732,13 @@ class Executor: from paddle.optimizer.lr import LRScheduler - if hasattr(program, 'lr_sheduler'): - lr_sheduler = program.lr_sheduler - assert isinstance(lr_sheduler, LRScheduler), "must be LRScheduler" - lr_value = lr_sheduler() - lr_var = program.global_block().vars[lr_sheduler._var_name] + if hasattr(program, 'lr_scheduler'): + lr_scheduler = program.lr_scheduler + assert isinstance(lr_scheduler, LRScheduler), "must be LRScheduler" + lr_value = lr_scheduler() + lr_var = program.global_block().vars[lr_scheduler._var_name] data = np.array([lr_value]).astype(convert_dtype(lr_var.dtype)) - tensor = core.get_variable_tensor(scope, lr_sheduler._var_name) + tensor = core.get_variable_tensor(scope, lr_scheduler._var_name) tensor.set(data, self.place) self._default_executor.run_from_dataset(trainer_instance) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 43c23eb003e2ee2b86df2a61522315344e39a810..cb57fe793737547c07366249c9375672cf0dc1e4 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -6080,8 +6080,8 @@ class Program: p._current_role = self._current_role p.__op_role_var = self.__op_role_var p._appending_grad_times = self._appending_grad_times - if hasattr(self, 'lr_sheduler'): - p.lr_sheduler = self.lr_sheduler + if hasattr(self, 'lr_scheduler'): + p.lr_scheduler = self.lr_scheduler # NOTE(zhiqiu): we sync the cloned program, to update its program by # its desc. diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index ded8883ffdb533093c1c77162f0093af3a71662b..d3c3f72d63990dbe694037c095ebc389d72739be 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -389,7 +389,7 @@ class Optimizer: dtype='float32' if self._dtype is None else self._dtype, ) main_prog = framework.default_main_program() - main_prog.lr_sheduler = self._learning_rate + main_prog.lr_scheduler = self._learning_rate main_prog.lr_var = lr_var self._learning_rate_map[ framework.default_main_program() diff --git a/python/paddle/fluid/tests/unittests/ipu/test_lr_sheduler_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_lr_sheduler_ipu.py index 91c94ce48dda1ee8d6296c2d13f6b513797816f4..567b9ef84a5a1d72ea2bb21c24deed2de3164a6c 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_lr_sheduler_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_lr_sheduler_ipu.py @@ -67,8 +67,8 @@ class TestConvNet(IPUOpTest): result = [] for _ in range(100): - if hasattr(program, "lr_sheduler"): - program.lr_sheduler.step() + if hasattr(program, "lr_scheduler"): + program.lr_scheduler.step() loss_res = exe.run( program, feed=self.feed, fetch_list=self.fetch_list ) diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py index 54abe60f013f2fe6b0b013528d4bddeaaf762e8f..3e157b201f580d4d013a88d026d8557dfe0fa589 100755 --- a/python/paddle/fluid/tests/unittests/test_dist_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_base.py @@ -132,13 +132,13 @@ class TestDistRunnerBase: @staticmethod def get_lr_scheduler(program): - lr_sheduler = None - if hasattr(program, 'lr_sheduler'): + lr_scheduler = None + if hasattr(program, 'lr_scheduler'): from paddle.optimizer.lr import LRScheduler - lr_sheduler = program.lr_sheduler - assert isinstance(lr_sheduler, LRScheduler), "must be LRScheduler" - return lr_sheduler + lr_scheduler = program.lr_scheduler + assert isinstance(lr_scheduler, LRScheduler), "must be LRScheduler" + return lr_scheduler def run_pserver(self, args): self.lr = args.lr @@ -196,14 +196,14 @@ class TestDistRunnerBase: out_losses = [] main_program = fluid.default_main_program() - lr_sheduler = self.get_lr_scheduler(main_program) + lr_scheduler = self.get_lr_scheduler(main_program) for i in range(RUN_STEP): loss = exe.run(main_program, fetch_list=[avg_cost]) loss = loss[0] if loss else None out_losses.append(loss) print_to_err(type(self).__name__, "run step %d finished" % i) - if lr_sheduler is not None: - lr_sheduler.step() + if lr_scheduler is not None: + lr_scheduler.step() data_loader.reset() print_to_err(type(self).__name__, "trainer run finished") diff --git a/python/paddle/fluid/tests/unittests/test_newprofiler.py b/python/paddle/fluid/tests/unittests/test_newprofiler.py index 18c948a3658eca9608e05e015edc1d402c53de77..7f2009b1a884acd1012beddc9d924de5b75aa566 100755 --- a/python/paddle/fluid/tests/unittests/test_newprofiler.py +++ b/python/paddle/fluid/tests/unittests/test_newprofiler.py @@ -92,7 +92,7 @@ class TestProfiler(unittest.TestCase): y = x / 2.0 prof.step() - def my_sheduler(num_step): + def my_scheduler(num_step): if num_step % 5 < 2: return profiler.ProfilerState.RECORD_AND_RETURN elif num_step % 5 < 3: @@ -102,7 +102,7 @@ class TestProfiler(unittest.TestCase): else: return profiler.ProfilerState.CLOSED - def my_sheduler1(num_step): + def my_scheduler1(num_step): if num_step % 5 < 2: return profiler.ProfilerState.RECORD elif num_step % 5 < 3: @@ -124,7 +124,7 @@ class TestProfiler(unittest.TestCase): prof = None with profiler.Profiler( targets=[profiler.ProfilerTarget.CPU], - scheduler=my_sheduler, + scheduler=my_scheduler, on_trace_ready=my_trace_back, ) as prof: for i in range(5): @@ -132,7 +132,7 @@ class TestProfiler(unittest.TestCase): prof.step() prof = None with profiler.Profiler( - targets=[profiler.ProfilerTarget.CPU], scheduler=my_sheduler1 + targets=[profiler.ProfilerTarget.CPU], scheduler=my_scheduler1 ) as prof: for i in range(5): y = x / 2.0 diff --git a/python/paddle/incubate/distributed/fleet/parameter_server/ir/public.py b/python/paddle/incubate/distributed/fleet/parameter_server/ir/public.py index 7d8e09d9b17cce99b405e9c631fd24599d8c2b22..16a93ab6b6fe3712e145e5c3e497b187e952ccd7 100755 --- a/python/paddle/incubate/distributed/fleet/parameter_server/ir/public.py +++ b/python/paddle/incubate/distributed/fleet/parameter_server/ir/public.py @@ -1362,11 +1362,11 @@ def _get_optimize_ops(_program): def _add_lr_decay_table_pass(main_program, compiled_config, lr_decay_steps): - if hasattr(compiled_config.origin_main_program, 'lr_sheduler'): + if hasattr(compiled_config.origin_main_program, 'lr_scheduler'): from paddle.optimizer.lr import LRScheduler assert isinstance( - compiled_config.origin_main_program.lr_sheduler, LRScheduler + compiled_config.origin_main_program.lr_scheduler, LRScheduler ), "must be LRScheduler" ops = _get_optimize_ops(compiled_config.origin_main_program) lr_param_dict = _get_lr_param_dict(ops) @@ -1374,8 +1374,8 @@ def _add_lr_decay_table_pass(main_program, compiled_config, lr_decay_steps): lr_decay_main_program, lr_decay_startup_program, lr_name, - ) = _get_lr_sheduler_program( - compiled_config.origin_main_program.lr_sheduler, + ) = _get_lr_scheduler_program( + compiled_config.origin_main_program.lr_scheduler, lr_param_dict, lr_decay_steps, ) @@ -1399,7 +1399,7 @@ def _get_lr_param_dict(opt_ops): return lr_param_dict -def _get_lr_sheduler_program(lr_sheduler, lr_param_dict, lr_decay_steps): +def _get_lr_scheduler_program(lr_scheduler, lr_param_dict, lr_decay_steps): schedler_decay = [ 'NoamDecay', 'NaturalExpDecay', @@ -1424,11 +1424,13 @@ def _get_lr_sheduler_program(lr_sheduler, lr_param_dict, lr_decay_steps): decay_startup_program = paddle.static.Program() lr_name = "" - if isinstance(lr_sheduler, ExponentialDecay): + if isinstance(lr_scheduler, ExponentialDecay): with paddle.static.program_guard( decay_main_program, decay_startup_program ): - lr = exponential_decay(1.0, lr_decay_steps, lr_sheduler.gamma, True) + lr = exponential_decay( + 1.0, lr_decay_steps, lr_scheduler.gamma, True + ) lr_name = lr.name logging.warn( "ExponentialDecay is set, staircase = True, global learning rate decay step is [ %d ], Change decay steps as follow: \n" @@ -1437,21 +1439,25 @@ def _get_lr_sheduler_program(lr_sheduler, lr_param_dict, lr_decay_steps): "\t strategy.a_sync_configs= { 'lr_decay_steps' : YOUR_DECAY_STEP } \n" % lr_decay_steps ) - elif isinstance(lr_sheduler, NoamDecay): + elif isinstance(lr_scheduler, NoamDecay): with paddle.static.program_guard( decay_main_program, decay_startup_program ): - lr = noam_decay(lr_sheduler.d_model, lr_sheduler.warmup_steps, 1.0) + lr = noam_decay( + lr_scheduler.d_model, lr_scheduler.warmup_steps, 1.0 + ) lr_name = lr.name logging.warn( "NoamDecay is set, warmup steps is [ %d ]" - % lr_sheduler.warmup_steps + % lr_scheduler.warmup_steps ) - elif isinstance(lr_sheduler, NaturalExpDecay): + elif isinstance(lr_scheduler, NaturalExpDecay): with paddle.static.program_guard( decay_main_program, decay_startup_program ): - lr = natural_exp_decay(1.0, lr_decay_steps, lr_sheduler.gamma, True) + lr = natural_exp_decay( + 1.0, lr_decay_steps, lr_scheduler.gamma, True + ) lr_name = lr.name logging.warn( "NaturalExpDecay is set, staircase = True, global learning rate decay step is [ %d ], Change decay steps as follow: \n" @@ -1460,12 +1466,12 @@ def _get_lr_sheduler_program(lr_sheduler, lr_param_dict, lr_decay_steps): "\t strategy.a_sync_configs= { 'lr_decay_steps' : YOUR_DECAY_STEP } \n" % lr_decay_steps ) - elif isinstance(lr_sheduler, InverseTimeDecay): + elif isinstance(lr_scheduler, InverseTimeDecay): with paddle.static.program_guard( decay_main_program, decay_startup_program ): lr = inverse_time_decay( - 1.0, lr_decay_steps, lr_sheduler.gamma, True + 1.0, lr_decay_steps, lr_scheduler.gamma, True ) lr_name = lr.name logging.warn( diff --git a/python/paddle/incubate/distributed/fleet/parameter_server/ir/trainer_pass.py b/python/paddle/incubate/distributed/fleet/parameter_server/ir/trainer_pass.py index 67d32480761c47541dcd1305efe04aa711c5d7c9..3d5ec4c1bf76e8f8b470078f971f7691a50024dd 100644 --- a/python/paddle/incubate/distributed/fleet/parameter_server/ir/trainer_pass.py +++ b/python/paddle/incubate/distributed/fleet/parameter_server/ir/trainer_pass.py @@ -93,7 +93,7 @@ def delete_optimizer_pass(program, config): optimizer_ops.extend(lr_ops) _delete_optimizer_op_and_vars(program, optimizer_ops) - if hasattr(config.origin_main_program, 'lr_sheduler'): + if hasattr(config.origin_main_program, 'lr_scheduler'): _add_lr_var(program, config) return program diff --git a/python/paddle/jit/dy2static/partial_program.py b/python/paddle/jit/dy2static/partial_program.py index f46c3a65cfeb34ed333b2acdf637533f532c0c22..e2abec10c08235396735aab4cb7502eeaf00b095 100644 --- a/python/paddle/jit/dy2static/partial_program.py +++ b/python/paddle/jit/dy2static/partial_program.py @@ -1127,8 +1127,10 @@ def add_build_strategy_for( ) ir_graph = framework.IrGraph(compiled_program._graph) builded_program = ir_graph.to_program() - if hasattr(compiled_program._program, 'lr_sheduler'): - builded_program.lr_sheduler = compiled_program._program.lr_sheduler + if hasattr(compiled_program._program, 'lr_scheduler'): + builded_program.lr_scheduler = ( + compiled_program._program.lr_scheduler + ) else: # can't just create a new program, we need copy the vardesc. builded_program = paddle.static.Program() diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py index 72924ed692fceaeb26f61bfc38279d5291446d49..d7bfc0b22963205fa201efa63529b97128328ab0 100644 --- a/python/paddle/optimizer/optimizer.py +++ b/python/paddle/optimizer/optimizer.py @@ -444,7 +444,7 @@ class Optimizer: dtype=_lr_dtype, ) main_prog = framework.default_main_program() - main_prog.lr_sheduler = self._learning_rate + main_prog.lr_scheduler = self._learning_rate main_prog.lr_var = lr_var self._learning_rate_map[