diff --git a/python/paddle/fluid/contrib/slim/distillation/distillation_strategy.py b/python/paddle/fluid/contrib/slim/distillation/distillation_strategy.py index 1f11f07a51e713d42cee5e63bd2a9a02d82232f7..2fc6b45183164f135ae3ced08c1900ad526add45 100644 --- a/python/paddle/fluid/contrib/slim/distillation/distillation_strategy.py +++ b/python/paddle/fluid/contrib/slim/distillation/distillation_strategy.py @@ -13,7 +13,7 @@ # limitations under the License. from ..core.strategy import Strategy -from ....framework import Program, program_guard +from ....framework import Program, Variable, program_guard from .... import Executor import logging @@ -74,8 +74,17 @@ class DistillationStrategy(Strategy): startup_program = Program() with program_guard(graph.program, startup_program): context.distiller_optimizer._name = 'distillation_optimizer' - context.distiller_optimizer.minimize( - graph.var(graph.out_nodes['loss'])._var) + + # The learning rate variable may be created in other program. + # Update information in optimizer to make + # learning rate variable being accessible in current program. + optimizer = context.distiller_optimizer + if isinstance(optimizer._learning_rate, Variable): + optimizer._learning_rate_map[ + graph.program] = optimizer._learning_rate + + optimizer.minimize(graph.var(graph.out_nodes['loss'])._var) + exe = Executor(context.place) exe.run(startup_program, scope=context.scope) diff --git a/python/paddle/fluid/contrib/slim/graph/graph_wrapper.py b/python/paddle/fluid/contrib/slim/graph/graph_wrapper.py index c208553fd811c7b18f9168b8fcae4da6e5856070..7388ecd3b096fc05d1420b904f2d65d805c3fc53 100644 --- a/python/paddle/fluid/contrib/slim/graph/graph_wrapper.py +++ b/python/paddle/fluid/contrib/slim/graph/graph_wrapper.py @@ -402,6 +402,12 @@ class GraphWrapper(object): elif 'cost' in graph.out_nodes: target_name = graph.out_nodes['cost'] target = graph.var(target_name)._var + # The learning rate variable may be created in other program. + # Update information in optimizer to make + # learning rate variable being accessible in current program. + if isinstance(optimizer._learning_rate, Variable): + optimizer._learning_rate_map[ + graph.program] = optimizer._learning_rate optimizer.minimize(target, no_grad_set=no_grad_var_names) exe = Executor(place) diff --git a/python/paddle/fluid/contrib/slim/tests/test_distillation_strategy.py b/python/paddle/fluid/contrib/slim/tests/test_distillation_strategy.py index 9b967c0ac7d2bfdab23d4557ef0b7d28f4118ff7..094cc4c6ac8be582fc31d0436e4468d2ebbb235a 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_distillation_strategy.py +++ b/python/paddle/fluid/contrib/slim/tests/test_distillation_strategy.py @@ -41,9 +41,11 @@ class TestDistillationStrategy(unittest.TestCase): cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) + optimizer = fluid.optimizer.Momentum( momentum=0.9, - learning_rate=0.01, + learning_rate=fluid.layers.piecewise_decay( + boundaries=[5, 10], values=[0.01, 0.001, 0.0001]), regularization=fluid.regularizer.L2Decay(4e-5)) place = fluid.CUDAPlace(0)