diff --git a/dygraph/mobilenet/utils/optimizer.py b/dygraph/mobilenet/utils/optimizer.py index 1501bcbe9c2e331c770cdf3d7d20ee2a8ae0a14b..e34abfe2f8287b7bf8c0ec2fe2509a013d5f9a69 100644 --- a/dygraph/mobilenet/utils/optimizer.py +++ b/dygraph/mobilenet/utils/optimizer.py @@ -20,7 +20,6 @@ import math import paddle.fluid as fluid import paddle.fluid.layers.ops as ops -from paddle.fluid.initializer import init_on_cpu from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter @@ -30,10 +29,9 @@ def cosine_decay(learning_rate, step_each_epoch, epochs=120): """ global_step = _decay_step_counter() - with init_on_cpu(): - epoch = ops.floor(global_step / step_each_epoch) - decayed_lr = learning_rate * \ - (ops.cos(epoch * (math.pi / epochs)) + 1)/2 + epoch = ops.floor(global_step / step_each_epoch) + decayed_lr = learning_rate * \ + (ops.cos(epoch * (math.pi / epochs)) + 1)/2 return decayed_lr @@ -53,17 +51,16 @@ def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120): warmup_epoch = fluid.layers.fill_constant( shape=[1], dtype='float32', value=float(5), force_cpu=True) - with init_on_cpu(): - epoch = ops.floor(global_step / step_each_epoch) - with fluid.layers.control_flow.Switch() as switch: - with switch.case(epoch < warmup_epoch): - decayed_lr = learning_rate * (global_step / - (step_each_epoch * warmup_epoch)) - fluid.layers.tensor.assign(input=decayed_lr, output=lr) - with switch.default(): - decayed_lr = learning_rate * \ - (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2 - fluid.layers.tensor.assign(input=decayed_lr, output=lr) + epoch = ops.floor(global_step / step_each_epoch) + with fluid.layers.control_flow.Switch() as switch: + with switch.case(epoch < warmup_epoch): + decayed_lr = learning_rate * (global_step / + (step_each_epoch * warmup_epoch)) + fluid.layers.tensor.assign(input=decayed_lr, output=lr) + with switch.default(): + decayed_lr = learning_rate * \ + (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2 + fluid.layers.tensor.assign(input=decayed_lr, output=lr) return lr @@ -85,19 +82,18 @@ def exponential_decay_with_warmup(learning_rate, warmup_epoch = fluid.layers.fill_constant( shape=[1], dtype='float32', value=float(warm_up_epoch), force_cpu=True) - with init_on_cpu(): - epoch = ops.floor(global_step / step_each_epoch) - with fluid.layers.control_flow.Switch() as switch: - with switch.case(epoch < warmup_epoch): - decayed_lr = learning_rate * (global_step / - (step_each_epoch * warmup_epoch)) - fluid.layers.assign(input=decayed_lr, output=lr) - with switch.default(): - div_res = ( - global_step - warmup_epoch * step_each_epoch) / decay_epochs - div_res = ops.floor(div_res) - decayed_lr = learning_rate * (decay_rate**div_res) - fluid.layers.assign(input=decayed_lr, output=lr) + epoch = ops.floor(global_step / step_each_epoch) + with fluid.layers.control_flow.Switch() as switch: + with switch.case(epoch < warmup_epoch): + decayed_lr = learning_rate * (global_step / + (step_each_epoch * warmup_epoch)) + fluid.layers.assign(input=decayed_lr, output=lr) + with switch.default(): + div_res = ( + global_step - warmup_epoch * step_each_epoch) / decay_epochs + div_res = ops.floor(div_res) + decayed_lr = learning_rate * (decay_rate**div_res) + fluid.layers.assign(input=decayed_lr, output=lr) return lr