diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py
index 4c1996331cab09cdd1454b75aa04e5dd7bb3a661..378aeb37605f1971da3fe4a926e4b36b8eae2ca4 100644
--- a/python/paddle/fluid/layers/learning_rate_scheduler.py
+++ b/python/paddle/fluid/layers/learning_rate_scheduler.py
@@ -313,9 +313,11 @@ def cosine_decay(learning_rate, step_each_epoch, epochs):
     """
     Applies cosine decay to the learning rate.
 
-    when training a model, it is oftem recommended to lower the learning rate as the
+    when training a model, it is often recommended to lower the learning rate as the
     training progresses. By using this function, the learning rate will be decayed by
     following cosine decay strategy.
+
+    decayed_lr = learning_rate * 0.5 * (math.cos(epoch * math.pi / epochs) + 1)
     
     Args:
         learning_rate(Variable|float): The initial learning rate.