diff --git a/demo/optimizer.py b/demo/optimizer.py index 0f0c57985f839097e9e1ae4643ba2e5a2fb64698..6b8962749b6f5000fadc67356dbb302b57d4c3e7 100644 --- a/demo/optimizer.py +++ b/demo/optimizer.py @@ -20,7 +20,6 @@ import math import paddle.fluid as fluid import paddle.fluid.layers.ops as ops -from paddle.fluid.initializer import init_on_cpu from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter lr_strategy = 'cosine_decay' @@ -40,10 +39,9 @@ def cosine_decay(learning_rate, step_each_epoch, epochs=120): """ global_step = _decay_step_counter() - with init_on_cpu(): - epoch = ops.floor(global_step / step_each_epoch) - decayed_lr = learning_rate * \ - (ops.cos(epoch * (math.pi / epochs)) + 1)/2 + epoch = ops.floor(global_step / step_each_epoch) + decayed_lr = learning_rate * \ + (ops.cos(epoch * (math.pi / epochs)) + 1)/2 return decayed_lr @@ -63,17 +61,16 @@ def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120): warmup_epoch = fluid.layers.fill_constant( shape=[1], dtype='float32', value=float(5), force_cpu=True) - with init_on_cpu(): - epoch = ops.floor(global_step / step_each_epoch) - with fluid.layers.control_flow.Switch() as switch: - with switch.case(epoch < warmup_epoch): - decayed_lr = learning_rate * (global_step / - (step_each_epoch * warmup_epoch)) - fluid.layers.tensor.assign(input=decayed_lr, output=lr) - with switch.default(): - decayed_lr = learning_rate * \ - (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2 - fluid.layers.tensor.assign(input=decayed_lr, output=lr) + epoch = ops.floor(global_step / step_each_epoch) + with fluid.layers.control_flow.Switch() as switch: + with switch.case(epoch < warmup_epoch): + decayed_lr = learning_rate * (global_step / + (step_each_epoch * warmup_epoch)) + fluid.layers.tensor.assign(input=decayed_lr, output=lr) + with switch.default(): + decayed_lr = learning_rate * \ + (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2 + fluid.layers.tensor.assign(input=decayed_lr, output=lr) return lr @@ -95,19 +92,18 @@ def exponential_decay_with_warmup(learning_rate, warmup_epoch = fluid.layers.fill_constant( shape=[1], dtype='float32', value=float(warm_up_epoch), force_cpu=True) - with init_on_cpu(): - epoch = ops.floor(global_step / step_each_epoch) - with fluid.layers.control_flow.Switch() as switch: - with switch.case(epoch < warmup_epoch): - decayed_lr = learning_rate * (global_step / - (step_each_epoch * warmup_epoch)) - fluid.layers.assign(input=decayed_lr, output=lr) - with switch.default(): - div_res = (global_step - warmup_epoch * step_each_epoch - ) / decay_epochs - div_res = ops.floor(div_res) - decayed_lr = learning_rate * (decay_rate**div_res) - fluid.layers.assign(input=decayed_lr, output=lr) + epoch = ops.floor(global_step / step_each_epoch) + with fluid.layers.control_flow.Switch() as switch: + with switch.case(epoch < warmup_epoch): + decayed_lr = learning_rate * (global_step / + (step_each_epoch * warmup_epoch)) + fluid.layers.assign(input=decayed_lr, output=lr) + with switch.default(): + div_res = (global_step - warmup_epoch * step_each_epoch + ) / decay_epochs + div_res = ops.floor(div_res) + decayed_lr = learning_rate * (decay_rate**div_res) + fluid.layers.assign(input=decayed_lr, output=lr) return lr