diff --git a/paddle/gserver/layers/ScaleShiftLayer.cpp b/paddle/gserver/layers/ScaleShiftLayer.cpp
index 06dcb409f8c98c23a1116513f6bcbd2446b290f4..35fd038ab43a8a8b08bc328b3d1b08a7bbedd0a1 100644
--- a/paddle/gserver/layers/ScaleShiftLayer.cpp
+++ b/paddle/gserver/layers/ScaleShiftLayer.cpp
@@ -17,15 +17,15 @@ limitations under the License. */
 namespace paddle {
 
 /**
- * A layer applies a slope and an intercept to the input element-wise for
- * scaling and shifting. Noting that this layer is trainable which differs
- * from the SlopeInterceptLayer.
+ * A layer applies a linear transformation to each element in each row of
+ * the input matrix. For each element, the layer first re-scale it and then
+ * adds a bias to it.
  *
  * \f[
  *    y = wx + b
  * \f]
  *
- * Here, w is scale and b is offset, which are scalars and trainable.
+ * Here, w is the scale and b is the bias. Both w and b are trainable scalars.
  *
  */
 
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index ec3a87aa3663e4926554c16933116defc6724534..c9e3ded65cb8fc8d00d3decac6af2aff2b67a37d 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -6219,9 +6219,13 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1):
 @wrap_bias_attr_default()
 def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None):
     """
-    A layer applies a slope and an intercept to the input element-wise for 
-    scaling and shifting. Noting that this layer is trainable which differs
-    from the slope_intercept_layer.
+    A layer applies a linear transformation to each element in each row of 
+    the input matrix. For each element, the layer first re-scale it and then 
+    adds a bias to it.
+
+    This layer is very like the SlopeInterceptLayer, except the scale and 
+    bias are trainable.
+
     .. math::
 
         y = w * x + b