diff --git a/dygraph/mobilenet/utils/optimizer.py b/dygraph/mobilenet/utils/optimizer.py
index 1501bcbe9c2e331c770cdf3d7d20ee2a8ae0a14b..e34abfe2f8287b7bf8c0ec2fe2509a013d5f9a69 100644
--- a/dygraph/mobilenet/utils/optimizer.py
+++ b/dygraph/mobilenet/utils/optimizer.py
@@ -20,7 +20,6 @@ import math
 
 import paddle.fluid as fluid
 import paddle.fluid.layers.ops as ops
-from paddle.fluid.initializer import init_on_cpu
 from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
 
 
@@ -30,10 +29,9 @@ def cosine_decay(learning_rate, step_each_epoch, epochs=120):
     """
     global_step = _decay_step_counter()
 
-    with init_on_cpu():
-        epoch = ops.floor(global_step / step_each_epoch)
-        decayed_lr = learning_rate * \
-                     (ops.cos(epoch * (math.pi / epochs)) + 1)/2
+    epoch = ops.floor(global_step / step_each_epoch)
+    decayed_lr = learning_rate * \
+                 (ops.cos(epoch * (math.pi / epochs)) + 1)/2
     return decayed_lr
 
 
@@ -53,17 +51,16 @@ def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120):
     warmup_epoch = fluid.layers.fill_constant(
         shape=[1], dtype='float32', value=float(5), force_cpu=True)
 
-    with init_on_cpu():
-        epoch = ops.floor(global_step / step_each_epoch)
-        with fluid.layers.control_flow.Switch() as switch:
-            with switch.case(epoch < warmup_epoch):
-                decayed_lr = learning_rate * (global_step /
-                                              (step_each_epoch * warmup_epoch))
-                fluid.layers.tensor.assign(input=decayed_lr, output=lr)
-            with switch.default():
-                decayed_lr = learning_rate * \
-                    (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
-                fluid.layers.tensor.assign(input=decayed_lr, output=lr)
+    epoch = ops.floor(global_step / step_each_epoch)
+    with fluid.layers.control_flow.Switch() as switch:
+        with switch.case(epoch < warmup_epoch):
+            decayed_lr = learning_rate * (global_step /
+                                          (step_each_epoch * warmup_epoch))
+            fluid.layers.tensor.assign(input=decayed_lr, output=lr)
+        with switch.default():
+            decayed_lr = learning_rate * \
+                (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
+            fluid.layers.tensor.assign(input=decayed_lr, output=lr)
     return lr
 
 
@@ -85,19 +82,18 @@ def exponential_decay_with_warmup(learning_rate,
     warmup_epoch = fluid.layers.fill_constant(
         shape=[1], dtype='float32', value=float(warm_up_epoch), force_cpu=True)
 
-    with init_on_cpu():
-        epoch = ops.floor(global_step / step_each_epoch)
-        with fluid.layers.control_flow.Switch() as switch:
-            with switch.case(epoch < warmup_epoch):
-                decayed_lr = learning_rate * (global_step /
-                                              (step_each_epoch * warmup_epoch))
-                fluid.layers.assign(input=decayed_lr, output=lr)
-            with switch.default():
-                div_res = (
-                    global_step - warmup_epoch * step_each_epoch) / decay_epochs
-                div_res = ops.floor(div_res)
-                decayed_lr = learning_rate * (decay_rate**div_res)
-                fluid.layers.assign(input=decayed_lr, output=lr)
+    epoch = ops.floor(global_step / step_each_epoch)
+    with fluid.layers.control_flow.Switch() as switch:
+        with switch.case(epoch < warmup_epoch):
+            decayed_lr = learning_rate * (global_step /
+                                          (step_each_epoch * warmup_epoch))
+            fluid.layers.assign(input=decayed_lr, output=lr)
+        with switch.default():
+            div_res = (
+                global_step - warmup_epoch * step_each_epoch) / decay_epochs
+            div_res = ops.floor(div_res)
+            decayed_lr = learning_rate * (decay_rate**div_res)
+            fluid.layers.assign(input=decayed_lr, output=lr)
 
     return lr