diff --git a/demo/optimizer.py b/demo/optimizer.py
index 0f0c57985f839097e9e1ae4643ba2e5a2fb64698..6b8962749b6f5000fadc67356dbb302b57d4c3e7 100644
--- a/demo/optimizer.py
+++ b/demo/optimizer.py
@@ -20,7 +20,6 @@ import math
 
 import paddle.fluid as fluid
 import paddle.fluid.layers.ops as ops
-from paddle.fluid.initializer import init_on_cpu
 from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
 
 lr_strategy = 'cosine_decay'
@@ -40,10 +39,9 @@ def cosine_decay(learning_rate, step_each_epoch, epochs=120):
     """
     global_step = _decay_step_counter()
 
-    with init_on_cpu():
-        epoch = ops.floor(global_step / step_each_epoch)
-        decayed_lr = learning_rate * \
-                     (ops.cos(epoch * (math.pi / epochs)) + 1)/2
+    epoch = ops.floor(global_step / step_each_epoch)
+    decayed_lr = learning_rate * \
+                 (ops.cos(epoch * (math.pi / epochs)) + 1)/2
     return decayed_lr
 
 
@@ -63,17 +61,16 @@ def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120):
     warmup_epoch = fluid.layers.fill_constant(
         shape=[1], dtype='float32', value=float(5), force_cpu=True)
 
-    with init_on_cpu():
-        epoch = ops.floor(global_step / step_each_epoch)
-        with fluid.layers.control_flow.Switch() as switch:
-            with switch.case(epoch < warmup_epoch):
-                decayed_lr = learning_rate * (global_step /
-                                              (step_each_epoch * warmup_epoch))
-                fluid.layers.tensor.assign(input=decayed_lr, output=lr)
-            with switch.default():
-                decayed_lr = learning_rate * \
-                    (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
-                fluid.layers.tensor.assign(input=decayed_lr, output=lr)
+    epoch = ops.floor(global_step / step_each_epoch)
+    with fluid.layers.control_flow.Switch() as switch:
+        with switch.case(epoch < warmup_epoch):
+            decayed_lr = learning_rate * (global_step /
+                                          (step_each_epoch * warmup_epoch))
+            fluid.layers.tensor.assign(input=decayed_lr, output=lr)
+        with switch.default():
+            decayed_lr = learning_rate * \
+                (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
+            fluid.layers.tensor.assign(input=decayed_lr, output=lr)
     return lr
 
 
@@ -95,19 +92,18 @@ def exponential_decay_with_warmup(learning_rate,
     warmup_epoch = fluid.layers.fill_constant(
         shape=[1], dtype='float32', value=float(warm_up_epoch), force_cpu=True)
 
-    with init_on_cpu():
-        epoch = ops.floor(global_step / step_each_epoch)
-        with fluid.layers.control_flow.Switch() as switch:
-            with switch.case(epoch < warmup_epoch):
-                decayed_lr = learning_rate * (global_step /
-                                              (step_each_epoch * warmup_epoch))
-                fluid.layers.assign(input=decayed_lr, output=lr)
-            with switch.default():
-                div_res = (global_step - warmup_epoch * step_each_epoch
-                           ) / decay_epochs
-                div_res = ops.floor(div_res)
-                decayed_lr = learning_rate * (decay_rate**div_res)
-                fluid.layers.assign(input=decayed_lr, output=lr)
+    epoch = ops.floor(global_step / step_each_epoch)
+    with fluid.layers.control_flow.Switch() as switch:
+        with switch.case(epoch < warmup_epoch):
+            decayed_lr = learning_rate * (global_step /
+                                          (step_each_epoch * warmup_epoch))
+            fluid.layers.assign(input=decayed_lr, output=lr)
+        with switch.default():
+            div_res = (global_step - warmup_epoch * step_each_epoch
+                       ) / decay_epochs
+            div_res = ops.floor(div_res)
+            decayed_lr = learning_rate * (decay_rate**div_res)
+            fluid.layers.assign(input=decayed_lr, output=lr)
 
     return lr