diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 2bd274fad2ab7eed0902ffe944c6e0670f963233..47ac601e678013aceb62005d6f25595f49673d2c 100644
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -53,7 +53,7 @@ __all__ = [
     'cos_sim',
     'hsigmoid',
     'conv_projection',
-    'mse_cost',
+    'square_error_cost',
     'regression_cost',
     'classification_cost',
     'LayerOutput',
@@ -4238,13 +4238,18 @@ def __cost_input__(input, label, weight=None):
 
 @wrap_name_default()
 @layer_support()
-def mse_cost(input, label, weight=None, name=None, coeff=1.0, layer_attr=None):
+def square_error_cost(input,
+                      label,
+                      weight=None,
+                      name=None,
+                      coeff=1.0,
+                      layer_attr=None):
     """
-    mean squared error cost:
+    sum of square error cost:
 
     ..  math::
 
-        \\frac{1}{N}\sum_{i=1}^N(t_i-y_i)^2
+        cost = \\sum_{i=1}^N(t_i-y_i)^2
 
     :param name: layer name.
     :type name: basestring
@@ -4273,7 +4278,7 @@ def mse_cost(input, label, weight=None, name=None, coeff=1.0, layer_attr=None):
     return LayerOutput(name, LayerType.COST, parents=parents, size=1)
 
 
-regression_cost = mse_cost
+regression_cost = square_error_cost
 
 
 @wrap_name_default("cost")
@@ -5798,9 +5803,9 @@ def huber_regression_cost(input,
                           coeff=1.0,
                           layer_attr=None):
     """
-    In statistics, the Huber loss is a loss function used in robust regression, 
-    that is less sensitive to outliers in data than the squared error loss. 
-    Given a prediction f(x), a label y and :math:`\delta`, the loss function 
+    In statistics, the Huber loss is a loss function used in robust regression,
+    that is less sensitive to outliers in data than the squared error loss.
+    Given a prediction f(x), a label y and :math:`\delta`, the loss function
     is defined as:
 
     .. math:
@@ -5848,13 +5853,13 @@ def huber_classification_cost(input,
                               coeff=1.0,
                               layer_attr=None):
     """
-    For classification purposes, a variant of the Huber loss called modified Huber 
-    is sometimes used. Given a prediction f(x) (a real-valued classifier score) and 
-    a true binary class label :math:`y\in \left \{-1, 1 \right \}`, the modified Huber 
+    For classification purposes, a variant of the Huber loss called modified Huber
+    is sometimes used. Given a prediction f(x) (a real-valued classifier score) and
+    a true binary class label :math:`y\in \left \{-1, 1 \right \}`, the modified Huber
     loss is defined as:
 
     .. math:
-       loss = \max \left ( 0, 1-yf(x) \right )^2, yf(x)\geq 1 
+       loss = \max \left ( 0, 1-yf(x) \right )^2, yf(x)\geq 1
        loss = -4yf(x), \text{otherwise}
 
     The example usage is:
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr
index 96fb1d4ebde08b1bca2ffd09e8db0895842cbfd3..cec8a73db66f6091ec971527b3a42aa9e08154eb 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr
@@ -45,7 +45,7 @@ layers {
   coeff: 1.0
 }
 layers {
-  name: "__mse_cost_0__"
+  name: "__square_error_cost_0__"
   type: "square_error"
   size: 1
   active_type: ""
@@ -130,7 +130,7 @@ input_layer_names: "label"
 input_layer_names: "weight"
 input_layer_names: "multi_class_label"
 output_layer_names: "__cost_0__"
-output_layer_names: "__mse_cost_0__"
+output_layer_names: "__square_error_cost_0__"
 output_layer_names: "__nce_layer_0__"
 evaluators {
   name: "classification_error_evaluator"
@@ -146,7 +146,7 @@ sub_models {
   layer_names: "weight"
   layer_names: "__fc_layer_0__"
   layer_names: "__cost_0__"
-  layer_names: "__mse_cost_0__"
+  layer_names: "__square_error_cost_0__"
   layer_names: "multi_class_label"
   layer_names: "__nce_layer_0__"
   input_layer_names: "input"
@@ -154,7 +154,7 @@ sub_models {
   input_layer_names: "weight"
   input_layer_names: "multi_class_label"
   output_layer_names: "__cost_0__"
-  output_layer_names: "__mse_cost_0__"
+  output_layer_names: "__square_error_cost_0__"
   output_layer_names: "__nce_layer_0__"
   evaluator_names: "classification_error_evaluator"
   is_recurrent_layer_group: false
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
index c369062930e2b067ceab0dc3b25ba6c1eabe2450..caa6aaa9430ffaee7ade93ee04ec90103bf8cf43 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
@@ -10,7 +10,7 @@ fc = fc_layer(input=data, size=10, act=SoftmaxActivation())
 outputs(
     classification_cost(
         input=fc, label=lbl, weight=wt),
-    mse_cost(
+    square_error_cost(
         input=fc, label=lbl, weight=wt),
     nce_layer(
         input=fc,