未验证 提交 e492ee24 编写于 作者: N Nyakku Shigure 提交者: GitHub

fix a typo, `sheduler` -> `scheduler` (#52149)

上级 e57051b4
...@@ -69,7 +69,7 @@ class AddLrDecayTablePass(PassBase): ...@@ -69,7 +69,7 @@ class AddLrDecayTablePass(PassBase):
] = tensor_table_class ] = tensor_table_class
attrs['tensor_table'] = tensor_table_dict attrs['tensor_table'] = tensor_table_dict
def _get_lr_sheduler_program(self, lr_sheduler, lr_decay_steps): def _get_lr_scheduler_program(self, lr_scheduler, lr_decay_steps):
schedler_decay = [ schedler_decay = [
'NoamDecay', 'NoamDecay',
'NaturalExpDecay', 'NaturalExpDecay',
...@@ -81,12 +81,12 @@ class AddLrDecayTablePass(PassBase): ...@@ -81,12 +81,12 @@ class AddLrDecayTablePass(PassBase):
decay_startup_program = paddle.static.Program() decay_startup_program = paddle.static.Program()
lr_name = "" lr_name = ""
if isinstance(lr_sheduler, ExponentialDecay): if isinstance(lr_scheduler, ExponentialDecay):
with paddle.static.program_guard( with paddle.static.program_guard(
decay_main_program, decay_startup_program decay_main_program, decay_startup_program
): ):
lr = exponential_decay( lr = exponential_decay(
1.0, lr_decay_steps, lr_sheduler.gamma, True 1.0, lr_decay_steps, lr_scheduler.gamma, True
) )
lr_name = lr.name lr_name = lr.name
logging.warn( logging.warn(
...@@ -96,24 +96,24 @@ class AddLrDecayTablePass(PassBase): ...@@ -96,24 +96,24 @@ class AddLrDecayTablePass(PassBase):
"\t strategy.a_sync_configs= { 'lr_decay_steps' : YOUR_DECAY_STEP } \n" "\t strategy.a_sync_configs= { 'lr_decay_steps' : YOUR_DECAY_STEP } \n"
% lr_decay_steps % lr_decay_steps
) )
elif isinstance(lr_sheduler, NoamDecay): elif isinstance(lr_scheduler, NoamDecay):
with paddle.static.program_guard( with paddle.static.program_guard(
decay_main_program, decay_startup_program decay_main_program, decay_startup_program
): ):
lr = noam_decay( lr = noam_decay(
lr_sheduler.d_model, lr_sheduler.warmup_steps, 1.0 lr_scheduler.d_model, lr_scheduler.warmup_steps, 1.0
) )
lr_name = lr.name lr_name = lr.name
logging.warn( logging.warn(
"NoamDecay is set, warmup steps is [ %d ]" "NoamDecay is set, warmup steps is [ %d ]"
% lr_sheduler.warmup_steps % lr_scheduler.warmup_steps
) )
elif isinstance(lr_sheduler, NaturalExpDecay): elif isinstance(lr_scheduler, NaturalExpDecay):
with paddle.static.program_guard( with paddle.static.program_guard(
decay_main_program, decay_startup_program decay_main_program, decay_startup_program
): ):
lr = natural_exp_decay( lr = natural_exp_decay(
1.0, lr_decay_steps, lr_sheduler.gamma, True 1.0, lr_decay_steps, lr_scheduler.gamma, True
) )
lr_name = lr.name lr_name = lr.name
logging.warn( logging.warn(
...@@ -123,12 +123,12 @@ class AddLrDecayTablePass(PassBase): ...@@ -123,12 +123,12 @@ class AddLrDecayTablePass(PassBase):
"\t strategy.a_sync_configs= { 'lr_decay_steps' : YOUR_DECAY_STEP } \n" "\t strategy.a_sync_configs= { 'lr_decay_steps' : YOUR_DECAY_STEP } \n"
% lr_decay_steps % lr_decay_steps
) )
elif isinstance(lr_sheduler, InverseTimeDecay): elif isinstance(lr_scheduler, InverseTimeDecay):
with paddle.static.program_guard( with paddle.static.program_guard(
decay_main_program, decay_startup_program decay_main_program, decay_startup_program
): ):
lr = inverse_time_decay( lr = inverse_time_decay(
1.0, lr_decay_steps, lr_sheduler.gamma, True 1.0, lr_decay_steps, lr_scheduler.gamma, True
) )
lr_name = lr.name lr_name = lr.name
logging.warn( logging.warn(
...@@ -149,11 +149,11 @@ class AddLrDecayTablePass(PassBase): ...@@ -149,11 +149,11 @@ class AddLrDecayTablePass(PassBase):
def _apply_single_impl(self, main_program, startup_program, pass_ctx): def _apply_single_impl(self, main_program, startup_program, pass_ctx):
attrs = pass_ctx._attrs attrs = pass_ctx._attrs
if not hasattr(attrs['origin_main_program'], 'lr_sheduler'): if not hasattr(attrs['origin_main_program'], 'lr_scheduler'):
return return
assert isinstance( assert isinstance(
attrs['origin_main_program'].lr_sheduler, LRScheduler attrs['origin_main_program'].lr_scheduler, LRScheduler
), "must be LRScheduler" ), "must be LRScheduler"
ops = get_optimize_ops(attrs['origin_main_program']) ops = get_optimize_ops(attrs['origin_main_program'])
...@@ -161,8 +161,8 @@ class AddLrDecayTablePass(PassBase): ...@@ -161,8 +161,8 @@ class AddLrDecayTablePass(PassBase):
lr_decay_main_program, lr_decay_main_program,
lr_decay_startup_program, lr_decay_startup_program,
lr_name, lr_name,
) = self._get_lr_sheduler_program( ) = self._get_lr_scheduler_program(
attrs['origin_main_program'].lr_sheduler, attrs['lr_decay_steps'] attrs['origin_main_program'].lr_scheduler, attrs['lr_decay_steps']
) )
self._add_tensor_table( self._add_tensor_table(
attrs, attrs,
......
...@@ -612,7 +612,7 @@ class DeleteOptimizesPass(PassBase): ...@@ -612,7 +612,7 @@ class DeleteOptimizesPass(PassBase):
main_program, remote_optimize_ops, local_optimize_ops main_program, remote_optimize_ops, local_optimize_ops
) )
if hasattr(attrs['origin_main_program'], 'lr_sheduler'): if hasattr(attrs['origin_main_program'], 'lr_scheduler'):
self._add_lr_var(main_program, attrs) self._add_lr_var(main_program, attrs)
......
...@@ -1235,15 +1235,15 @@ class IpuCompiledProgram: ...@@ -1235,15 +1235,15 @@ class IpuCompiledProgram:
convert_pass.apply(self._graph) convert_pass.apply(self._graph)
program = framework.Program._construct_from_desc(desc) program = framework.Program._construct_from_desc(desc)
if hasattr(self._program, 'lr_sheduler'): if hasattr(self._program, 'lr_scheduler'):
# how to share var between two different block ? # how to share var between two different block ?
lr_var_name = self._program.lr_sheduler._var_name lr_var_name = self._program.lr_scheduler._var_name
program.lr_sheduler = self._program.lr_sheduler program.lr_scheduler = self._program.lr_scheduler
# Program.clone will clone lr_sheduler, so i set lr_var as # Program.clone will clone lr_scheduler, so i set lr_var as
# lr_sheduler attribute # lr_scheduler attribute
global_block = self._program.global_block() global_block = self._program.global_block()
program.lr_sheduler.lr_var = global_block.vars[lr_var_name] program.lr_scheduler.lr_var = global_block.vars[lr_var_name]
# with popart, we need to support batches_per_step, what means # with popart, we need to support batches_per_step, what means
# the shape of feed_var and feed_tensor(maybe numpy array) will # the shape of feed_var and feed_tensor(maybe numpy array) will
......
...@@ -871,8 +871,8 @@ class _ExecutorCache: ...@@ -871,8 +871,8 @@ class _ExecutorCache:
ir_graph = framework.IrGraph(compiled_program._graph) ir_graph = framework.IrGraph(compiled_program._graph)
converted_program = ir_graph.to_program() converted_program = ir_graph.to_program()
if hasattr(inner_program, 'lr_sheduler'): if hasattr(inner_program, 'lr_scheduler'):
converted_program.lr_sheduler = inner_program.lr_sheduler converted_program.lr_scheduler = inner_program.lr_scheduler
inner_program = converted_program inner_program = converted_program
# print(f"Program after convert:\n {inner_program}", flush=True) # print(f"Program after convert:\n {inner_program}", flush=True)
...@@ -1657,17 +1657,17 @@ class Executor: ...@@ -1657,17 +1657,17 @@ class Executor:
) )
self._feed_data(program, feed, feed_var_name, scope) self._feed_data(program, feed, feed_var_name, scope)
if hasattr(program, 'lr_sheduler'): if hasattr(program, 'lr_scheduler'):
from paddle.optimizer.lr import LRScheduler from paddle.optimizer.lr import LRScheduler
assert isinstance( assert isinstance(
program.lr_sheduler, LRScheduler program.lr_scheduler, LRScheduler
), "must be LRScheduler" ), "must be LRScheduler"
lr_sheduler = program.lr_sheduler lr_scheduler = program.lr_scheduler
lr_value = lr_sheduler() lr_value = lr_scheduler()
lr_var = program.global_block().vars[lr_sheduler._var_name] lr_var = program.global_block().vars[lr_scheduler._var_name]
data = np.array([lr_value]).astype(convert_dtype(lr_var.dtype)) data = np.array([lr_value]).astype(convert_dtype(lr_var.dtype))
tensor = core.get_variable_tensor(scope, lr_sheduler._var_name) tensor = core.get_variable_tensor(scope, lr_scheduler._var_name)
# NOTE(dev): `tensor.set(data, self.place)` always call TensorCopySync that is a blocking behavior. So we use `_copy_from` to replace it. # NOTE(dev): `tensor.set(data, self.place)` always call TensorCopySync that is a blocking behavior. So we use `_copy_from` to replace it.
cpu_tensor = _as_lodtensor(data, core.CPUPlace()) cpu_tensor = _as_lodtensor(data, core.CPUPlace())
if core.is_cuda_graph_capturing(): if core.is_cuda_graph_capturing():
...@@ -1810,15 +1810,15 @@ class Executor: ...@@ -1810,15 +1810,15 @@ class Executor:
) )
self._feed_data(program, feed, feed_var_name, scope) self._feed_data(program, feed, feed_var_name, scope)
if hasattr(program, 'lr_sheduler'): if hasattr(program, 'lr_schedulerr'):
assert isinstance( assert isinstance(
program.lr_sheduler, LRScheduler program.lr_scheduler, LRScheduler
), "must be LRScheduler" ), "must be LRScheduler"
lr_sheduler = program.lr_sheduler lr_scheduler = program.lr_scheduler
lr_value = lr_sheduler() lr_value = lr_scheduler()
lr_var = program.global_block().vars[lr_sheduler._var_name] lr_var = program.global_block().vars[lr_scheduler._var_name]
data = np.array([lr_value]).astype(convert_dtype(lr_var.dtype)) data = np.array([lr_value]).astype(convert_dtype(lr_var.dtype))
tensor = core.get_variable_tensor(scope, lr_sheduler._var_name) tensor = core.get_variable_tensor(scope, lr_scheduler._var_name)
tensor.set(data, self.place) tensor.set(data, self.place)
if not use_program_cache: if not use_program_cache:
...@@ -2588,14 +2588,14 @@ class Executor: ...@@ -2588,14 +2588,14 @@ class Executor:
from paddle.optimizer.lr import LRScheduler from paddle.optimizer.lr import LRScheduler
if hasattr(program, 'lr_sheduler'): if hasattr(program, 'lr_scheduler'):
lr_sheduler = program.lr_sheduler lr_scheduler = program.lr_scheduler
assert isinstance(lr_sheduler, LRScheduler), "must be LRScheduler" assert isinstance(lr_scheduler, LRScheduler), "must be LRScheduler"
lr_value = lr_sheduler() lr_value = lr_scheduler()
lr_var = program.global_block().vars[lr_sheduler._var_name] lr_var = program.global_block().vars[lr_scheduler._var_name]
data = np.array([lr_value]).astype(convert_dtype(lr_var.dtype)) data = np.array([lr_value]).astype(convert_dtype(lr_var.dtype))
tensor = core.get_variable_tensor( tensor = core.get_variable_tensor(
cached_scope, lr_sheduler._var_name cached_scope, lr_scheduler._var_name
) )
tensor.set(data, self.place) tensor.set(data, self.place)
...@@ -2732,13 +2732,13 @@ class Executor: ...@@ -2732,13 +2732,13 @@ class Executor:
from paddle.optimizer.lr import LRScheduler from paddle.optimizer.lr import LRScheduler
if hasattr(program, 'lr_sheduler'): if hasattr(program, 'lr_scheduler'):
lr_sheduler = program.lr_sheduler lr_scheduler = program.lr_scheduler
assert isinstance(lr_sheduler, LRScheduler), "must be LRScheduler" assert isinstance(lr_scheduler, LRScheduler), "must be LRScheduler"
lr_value = lr_sheduler() lr_value = lr_scheduler()
lr_var = program.global_block().vars[lr_sheduler._var_name] lr_var = program.global_block().vars[lr_scheduler._var_name]
data = np.array([lr_value]).astype(convert_dtype(lr_var.dtype)) data = np.array([lr_value]).astype(convert_dtype(lr_var.dtype))
tensor = core.get_variable_tensor(scope, lr_sheduler._var_name) tensor = core.get_variable_tensor(scope, lr_scheduler._var_name)
tensor.set(data, self.place) tensor.set(data, self.place)
self._default_executor.run_from_dataset(trainer_instance) self._default_executor.run_from_dataset(trainer_instance)
......
...@@ -6080,8 +6080,8 @@ class Program: ...@@ -6080,8 +6080,8 @@ class Program:
p._current_role = self._current_role p._current_role = self._current_role
p.__op_role_var = self.__op_role_var p.__op_role_var = self.__op_role_var
p._appending_grad_times = self._appending_grad_times p._appending_grad_times = self._appending_grad_times
if hasattr(self, 'lr_sheduler'): if hasattr(self, 'lr_scheduler'):
p.lr_sheduler = self.lr_sheduler p.lr_scheduler = self.lr_scheduler
# NOTE(zhiqiu): we sync the cloned program, to update its program by # NOTE(zhiqiu): we sync the cloned program, to update its program by
# its desc. # its desc.
......
...@@ -389,7 +389,7 @@ class Optimizer: ...@@ -389,7 +389,7 @@ class Optimizer:
dtype='float32' if self._dtype is None else self._dtype, dtype='float32' if self._dtype is None else self._dtype,
) )
main_prog = framework.default_main_program() main_prog = framework.default_main_program()
main_prog.lr_sheduler = self._learning_rate main_prog.lr_scheduler = self._learning_rate
main_prog.lr_var = lr_var main_prog.lr_var = lr_var
self._learning_rate_map[ self._learning_rate_map[
framework.default_main_program() framework.default_main_program()
......
...@@ -67,8 +67,8 @@ class TestConvNet(IPUOpTest): ...@@ -67,8 +67,8 @@ class TestConvNet(IPUOpTest):
result = [] result = []
for _ in range(100): for _ in range(100):
if hasattr(program, "lr_sheduler"): if hasattr(program, "lr_scheduler"):
program.lr_sheduler.step() program.lr_scheduler.step()
loss_res = exe.run( loss_res = exe.run(
program, feed=self.feed, fetch_list=self.fetch_list program, feed=self.feed, fetch_list=self.fetch_list
) )
......
...@@ -132,13 +132,13 @@ class TestDistRunnerBase: ...@@ -132,13 +132,13 @@ class TestDistRunnerBase:
@staticmethod @staticmethod
def get_lr_scheduler(program): def get_lr_scheduler(program):
lr_sheduler = None lr_scheduler = None
if hasattr(program, 'lr_sheduler'): if hasattr(program, 'lr_scheduler'):
from paddle.optimizer.lr import LRScheduler from paddle.optimizer.lr import LRScheduler
lr_sheduler = program.lr_sheduler lr_scheduler = program.lr_scheduler
assert isinstance(lr_sheduler, LRScheduler), "must be LRScheduler" assert isinstance(lr_scheduler, LRScheduler), "must be LRScheduler"
return lr_sheduler return lr_scheduler
def run_pserver(self, args): def run_pserver(self, args):
self.lr = args.lr self.lr = args.lr
...@@ -196,14 +196,14 @@ class TestDistRunnerBase: ...@@ -196,14 +196,14 @@ class TestDistRunnerBase:
out_losses = [] out_losses = []
main_program = fluid.default_main_program() main_program = fluid.default_main_program()
lr_sheduler = self.get_lr_scheduler(main_program) lr_scheduler = self.get_lr_scheduler(main_program)
for i in range(RUN_STEP): for i in range(RUN_STEP):
loss = exe.run(main_program, fetch_list=[avg_cost]) loss = exe.run(main_program, fetch_list=[avg_cost])
loss = loss[0] if loss else None loss = loss[0] if loss else None
out_losses.append(loss) out_losses.append(loss)
print_to_err(type(self).__name__, "run step %d finished" % i) print_to_err(type(self).__name__, "run step %d finished" % i)
if lr_sheduler is not None: if lr_scheduler is not None:
lr_sheduler.step() lr_scheduler.step()
data_loader.reset() data_loader.reset()
print_to_err(type(self).__name__, "trainer run finished") print_to_err(type(self).__name__, "trainer run finished")
......
...@@ -92,7 +92,7 @@ class TestProfiler(unittest.TestCase): ...@@ -92,7 +92,7 @@ class TestProfiler(unittest.TestCase):
y = x / 2.0 y = x / 2.0
prof.step() prof.step()
def my_sheduler(num_step): def my_scheduler(num_step):
if num_step % 5 < 2: if num_step % 5 < 2:
return profiler.ProfilerState.RECORD_AND_RETURN return profiler.ProfilerState.RECORD_AND_RETURN
elif num_step % 5 < 3: elif num_step % 5 < 3:
...@@ -102,7 +102,7 @@ class TestProfiler(unittest.TestCase): ...@@ -102,7 +102,7 @@ class TestProfiler(unittest.TestCase):
else: else:
return profiler.ProfilerState.CLOSED return profiler.ProfilerState.CLOSED
def my_sheduler1(num_step): def my_scheduler1(num_step):
if num_step % 5 < 2: if num_step % 5 < 2:
return profiler.ProfilerState.RECORD return profiler.ProfilerState.RECORD
elif num_step % 5 < 3: elif num_step % 5 < 3:
...@@ -124,7 +124,7 @@ class TestProfiler(unittest.TestCase): ...@@ -124,7 +124,7 @@ class TestProfiler(unittest.TestCase):
prof = None prof = None
with profiler.Profiler( with profiler.Profiler(
targets=[profiler.ProfilerTarget.CPU], targets=[profiler.ProfilerTarget.CPU],
scheduler=my_sheduler, scheduler=my_scheduler,
on_trace_ready=my_trace_back, on_trace_ready=my_trace_back,
) as prof: ) as prof:
for i in range(5): for i in range(5):
...@@ -132,7 +132,7 @@ class TestProfiler(unittest.TestCase): ...@@ -132,7 +132,7 @@ class TestProfiler(unittest.TestCase):
prof.step() prof.step()
prof = None prof = None
with profiler.Profiler( with profiler.Profiler(
targets=[profiler.ProfilerTarget.CPU], scheduler=my_sheduler1 targets=[profiler.ProfilerTarget.CPU], scheduler=my_scheduler1
) as prof: ) as prof:
for i in range(5): for i in range(5):
y = x / 2.0 y = x / 2.0
......
...@@ -1362,11 +1362,11 @@ def _get_optimize_ops(_program): ...@@ -1362,11 +1362,11 @@ def _get_optimize_ops(_program):
def _add_lr_decay_table_pass(main_program, compiled_config, lr_decay_steps): def _add_lr_decay_table_pass(main_program, compiled_config, lr_decay_steps):
if hasattr(compiled_config.origin_main_program, 'lr_sheduler'): if hasattr(compiled_config.origin_main_program, 'lr_scheduler'):
from paddle.optimizer.lr import LRScheduler from paddle.optimizer.lr import LRScheduler
assert isinstance( assert isinstance(
compiled_config.origin_main_program.lr_sheduler, LRScheduler compiled_config.origin_main_program.lr_scheduler, LRScheduler
), "must be LRScheduler" ), "must be LRScheduler"
ops = _get_optimize_ops(compiled_config.origin_main_program) ops = _get_optimize_ops(compiled_config.origin_main_program)
lr_param_dict = _get_lr_param_dict(ops) lr_param_dict = _get_lr_param_dict(ops)
...@@ -1374,8 +1374,8 @@ def _add_lr_decay_table_pass(main_program, compiled_config, lr_decay_steps): ...@@ -1374,8 +1374,8 @@ def _add_lr_decay_table_pass(main_program, compiled_config, lr_decay_steps):
lr_decay_main_program, lr_decay_main_program,
lr_decay_startup_program, lr_decay_startup_program,
lr_name, lr_name,
) = _get_lr_sheduler_program( ) = _get_lr_scheduler_program(
compiled_config.origin_main_program.lr_sheduler, compiled_config.origin_main_program.lr_scheduler,
lr_param_dict, lr_param_dict,
lr_decay_steps, lr_decay_steps,
) )
...@@ -1399,7 +1399,7 @@ def _get_lr_param_dict(opt_ops): ...@@ -1399,7 +1399,7 @@ def _get_lr_param_dict(opt_ops):
return lr_param_dict return lr_param_dict
def _get_lr_sheduler_program(lr_sheduler, lr_param_dict, lr_decay_steps): def _get_lr_scheduler_program(lr_scheduler, lr_param_dict, lr_decay_steps):
schedler_decay = [ schedler_decay = [
'NoamDecay', 'NoamDecay',
'NaturalExpDecay', 'NaturalExpDecay',
...@@ -1424,11 +1424,13 @@ def _get_lr_sheduler_program(lr_sheduler, lr_param_dict, lr_decay_steps): ...@@ -1424,11 +1424,13 @@ def _get_lr_sheduler_program(lr_sheduler, lr_param_dict, lr_decay_steps):
decay_startup_program = paddle.static.Program() decay_startup_program = paddle.static.Program()
lr_name = "" lr_name = ""
if isinstance(lr_sheduler, ExponentialDecay): if isinstance(lr_scheduler, ExponentialDecay):
with paddle.static.program_guard( with paddle.static.program_guard(
decay_main_program, decay_startup_program decay_main_program, decay_startup_program
): ):
lr = exponential_decay(1.0, lr_decay_steps, lr_sheduler.gamma, True) lr = exponential_decay(
1.0, lr_decay_steps, lr_scheduler.gamma, True
)
lr_name = lr.name lr_name = lr.name
logging.warn( logging.warn(
"ExponentialDecay is set, staircase = True, global learning rate decay step is [ %d ], Change decay steps as follow: \n" "ExponentialDecay is set, staircase = True, global learning rate decay step is [ %d ], Change decay steps as follow: \n"
...@@ -1437,21 +1439,25 @@ def _get_lr_sheduler_program(lr_sheduler, lr_param_dict, lr_decay_steps): ...@@ -1437,21 +1439,25 @@ def _get_lr_sheduler_program(lr_sheduler, lr_param_dict, lr_decay_steps):
"\t strategy.a_sync_configs= { 'lr_decay_steps' : YOUR_DECAY_STEP } \n" "\t strategy.a_sync_configs= { 'lr_decay_steps' : YOUR_DECAY_STEP } \n"
% lr_decay_steps % lr_decay_steps
) )
elif isinstance(lr_sheduler, NoamDecay): elif isinstance(lr_scheduler, NoamDecay):
with paddle.static.program_guard( with paddle.static.program_guard(
decay_main_program, decay_startup_program decay_main_program, decay_startup_program
): ):
lr = noam_decay(lr_sheduler.d_model, lr_sheduler.warmup_steps, 1.0) lr = noam_decay(
lr_scheduler.d_model, lr_scheduler.warmup_steps, 1.0
)
lr_name = lr.name lr_name = lr.name
logging.warn( logging.warn(
"NoamDecay is set, warmup steps is [ %d ]" "NoamDecay is set, warmup steps is [ %d ]"
% lr_sheduler.warmup_steps % lr_scheduler.warmup_steps
) )
elif isinstance(lr_sheduler, NaturalExpDecay): elif isinstance(lr_scheduler, NaturalExpDecay):
with paddle.static.program_guard( with paddle.static.program_guard(
decay_main_program, decay_startup_program decay_main_program, decay_startup_program
): ):
lr = natural_exp_decay(1.0, lr_decay_steps, lr_sheduler.gamma, True) lr = natural_exp_decay(
1.0, lr_decay_steps, lr_scheduler.gamma, True
)
lr_name = lr.name lr_name = lr.name
logging.warn( logging.warn(
"NaturalExpDecay is set, staircase = True, global learning rate decay step is [ %d ], Change decay steps as follow: \n" "NaturalExpDecay is set, staircase = True, global learning rate decay step is [ %d ], Change decay steps as follow: \n"
...@@ -1460,12 +1466,12 @@ def _get_lr_sheduler_program(lr_sheduler, lr_param_dict, lr_decay_steps): ...@@ -1460,12 +1466,12 @@ def _get_lr_sheduler_program(lr_sheduler, lr_param_dict, lr_decay_steps):
"\t strategy.a_sync_configs= { 'lr_decay_steps' : YOUR_DECAY_STEP } \n" "\t strategy.a_sync_configs= { 'lr_decay_steps' : YOUR_DECAY_STEP } \n"
% lr_decay_steps % lr_decay_steps
) )
elif isinstance(lr_sheduler, InverseTimeDecay): elif isinstance(lr_scheduler, InverseTimeDecay):
with paddle.static.program_guard( with paddle.static.program_guard(
decay_main_program, decay_startup_program decay_main_program, decay_startup_program
): ):
lr = inverse_time_decay( lr = inverse_time_decay(
1.0, lr_decay_steps, lr_sheduler.gamma, True 1.0, lr_decay_steps, lr_scheduler.gamma, True
) )
lr_name = lr.name lr_name = lr.name
logging.warn( logging.warn(
......
...@@ -93,7 +93,7 @@ def delete_optimizer_pass(program, config): ...@@ -93,7 +93,7 @@ def delete_optimizer_pass(program, config):
optimizer_ops.extend(lr_ops) optimizer_ops.extend(lr_ops)
_delete_optimizer_op_and_vars(program, optimizer_ops) _delete_optimizer_op_and_vars(program, optimizer_ops)
if hasattr(config.origin_main_program, 'lr_sheduler'): if hasattr(config.origin_main_program, 'lr_scheduler'):
_add_lr_var(program, config) _add_lr_var(program, config)
return program return program
......
...@@ -1127,8 +1127,10 @@ def add_build_strategy_for( ...@@ -1127,8 +1127,10 @@ def add_build_strategy_for(
) )
ir_graph = framework.IrGraph(compiled_program._graph) ir_graph = framework.IrGraph(compiled_program._graph)
builded_program = ir_graph.to_program() builded_program = ir_graph.to_program()
if hasattr(compiled_program._program, 'lr_sheduler'): if hasattr(compiled_program._program, 'lr_scheduler'):
builded_program.lr_sheduler = compiled_program._program.lr_sheduler builded_program.lr_scheduler = (
compiled_program._program.lr_scheduler
)
else: else:
# can't just create a new program, we need copy the vardesc. # can't just create a new program, we need copy the vardesc.
builded_program = paddle.static.Program() builded_program = paddle.static.Program()
......
...@@ -444,7 +444,7 @@ class Optimizer: ...@@ -444,7 +444,7 @@ class Optimizer:
dtype=_lr_dtype, dtype=_lr_dtype,
) )
main_prog = framework.default_main_program() main_prog = framework.default_main_program()
main_prog.lr_sheduler = self._learning_rate main_prog.lr_scheduler = self._learning_rate
main_prog.lr_var = lr_var main_prog.lr_var = lr_var
self._learning_rate_map[ self._learning_rate_map[
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册