diff --git a/python/paddle/fluid/dygraph/learning_rate_scheduler.py b/python/paddle/fluid/dygraph/learning_rate_scheduler.py
index 079bf865df6f0ad7d09d926ced5802df9c411d9d..dd17dbe52728bc9e8cd30cad24eb8ff3d014cd9d 100644
--- a/python/paddle/fluid/dygraph/learning_rate_scheduler.py
+++ b/python/paddle/fluid/dygraph/learning_rate_scheduler.py
@@ -25,7 +25,6 @@ __all__ = [
     'NoamDecay',
     'PiecewiseDecay',
     'PolynomialDecay',
-    'CosineDecay',
     'LinearLrWarmup',
     'ReduceLROnPlateau',
     'StepDecay',
@@ -294,69 +293,6 @@ class PolynomialDecay(LearningRateDecay):
         return decayed_lr
 
 
-class CosineDecay(LearningRateDecay):
-    r"""
-    :api_attr: imperative
-
-    Applies cosine decay to the learning rate.
-
-    The algorithm can be described as following.
-
-    .. math::
-
-        decayed\_learning\_rate = learning\_rate * 0.5 * (math.cos(global\_step * \\frac{math.pi}{step\_each\_epoch} ) + 1)
-
-    Parameters:
-        learning_rate(Variable|float): The initial learning rate. If the type
-            is Variable, it's a tensor with shape [1], the data type can be
-            float32 or float64. It also can be set to python int number.
-        step_each_epoch(int): The number of steps in an epoch.
-        epochs(int): The number of epochs.
-        begin(int, optional): The begin step. The initial value of global_step described above. The default value is 0.
-        step(int, optional): The step size used to calculate the new global_step in the description above.
-            The default value is 1.
-        dtype(str, optional): The data type used to create the learning rate variable. The data type can be set as
-            'float32', 'float64'. The default value is 'float32'.
-
-    Returns:
-        None.
-
-    Examples:
-        .. code-block:: python
-
-            base_lr = 0.1
-            with fluid.dygraph.guard():
-                optimizer  = fluid.optimizer.SGD(
-                    learning_rate = fluid.dygraph.CosineDecay(
-                            base_lr, 10000, 120) )
-    """
-
-    def __init__(
-        self,
-        learning_rate,
-        step_each_epoch,
-        epochs,
-        begin=0,
-        step=1,
-        dtype='float32',
-    ):
-        super().__init__(begin, step, dtype)
-        self.learning_rate = learning_rate
-        self.step_each_epoch = step_each_epoch
-        self.epochs = epochs
-
-    def step(self):
-        cur_epoch = paddle.floor(
-            self.create_lr_var(self.step_num / self.step_each_epoch)
-        )
-        decayed_lr = (
-            self.learning_rate
-            * 0.5
-            * (paddle.cos(cur_epoch * math.pi / self.epochs) + 1)
-        )
-        return decayed_lr
-
-
 class NoamDecay(LearningRateDecay):
     r"""
     :api_attr: imperative
diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py
index 37f61d351e622d8e129a53fb7192276e973feb71..bc1c8e78038fb7b235af28a4af1da690b06b64cf 100644
--- a/python/paddle/fluid/layers/learning_rate_scheduler.py
+++ b/python/paddle/fluid/layers/learning_rate_scheduler.py
@@ -495,8 +495,8 @@ def cosine_decay(learning_rate, step_each_epoch, epochs):
 
     with default_main_program()._lr_schedule_guard():
         if in_dygraph_mode():
-            decay = imperate_lr.CosineDecay(
-                learning_rate, step_each_epoch, epochs
+            decay = paddle.optimizer.lr.CosineAnnealingDecay(
+                learning_rate, epochs
             )
             return decay
         else:
diff --git a/test/dygraph_to_static/test_basic_api_transformation.py b/test/dygraph_to_static/test_basic_api_transformation.py
index 0abe6bcb5194a7d448c41b6de520105e2a763ae8..62499a9aadaaa83b8e5dbb3c80ddf8da706e0493 100644
--- a/test/dygraph_to_static/test_basic_api_transformation.py
+++ b/test/dygraph_to_static/test_basic_api_transformation.py
@@ -333,11 +333,11 @@ class TestDygraphBasicApi_Prelu(TestDygraphBasicApi):
 # 2. test Apis that inherit from LearningRateDecay
 def dyfunc_CosineDecay():
     base_lr = 0.1
-    CosineDecay = fluid.dygraph.CosineDecay(
-        learning_rate=base_lr, step_each_epoch=10000, epochs=120
+    CosineDecay = paddle.optimizer.lr.CosineAnnealingDecay(
+        learning_rate=base_lr, T_max=120
     )
     lr = CosineDecay()
-    return lr
+    return paddle.to_tensor(lr)
 
 
 def dyfunc_ExponentialDecay():