diff --git a/doc/ui/api/trainer_config_helpers/layers.rst b/doc/ui/api/trainer_config_helpers/layers.rst
index 8d297b0cf23acc691718890a47da38f4eff00d0c..4a02af396993207d305be488c993ce94cf20fe1d 100644
--- a/doc/ui/api/trainer_config_helpers/layers.rst
+++ b/doc/ui/api/trainer_config_helpers/layers.rst
@@ -191,6 +191,12 @@ embedding_layer
     :members: embedding_layer
     :noindex:
 
+scaling_projection
+-----------------
+..  automodule:: paddle.trainer_config_helpers.layers
+    :members: scaling_projection
+    :noindex:
+
 dotmul_projection
 -----------------
 ..  automodule:: paddle.trainer_config_helpers.layers
diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp
index 2f85dd3c3b69d21cffede49b001298c6629900a6..3c2df52fed4f86675ce8f1ead6a3b66e4babde34 100644
--- a/paddle/gserver/layers/CostLayer.cpp
+++ b/paddle/gserver/layers/CostLayer.cpp
@@ -605,7 +605,7 @@ public:
     int batchSize = input->getHeight();
     int size = 1;
     resizeOutput(batchSize, size);
-    output_.value->sumRows(*input);
+    output_.value->sumRows(*input, /* scaleSum= */1, /* scaleDest= */0);
   }
 
   virtual void backward(const UpdateCallback& callback = nullptr) {
diff --git a/paddle/gserver/layers/FullMatrixProjection.cpp b/paddle/gserver/layers/FullMatrixProjection.cpp
index 8241cbd37ec623622f19ff2ba35c21a4e3e3533a..f17c1b05bd892c7d933e4910887f977ac5cda79b 100644
--- a/paddle/gserver/layers/FullMatrixProjection.cpp
+++ b/paddle/gserver/layers/FullMatrixProjection.cpp
@@ -52,7 +52,9 @@ void FullMatrixProjection::backward(const UpdateCallback& callback) {
   }
 
   hl_set_sync_flag(syncFlag);
-  parameter_->incUpdate(callback);
+  if (weight_->getWGrad()) {
+    parameter_->incUpdate(callback);
+  }
 }
 
 }  // namespace paddle
diff --git a/paddle/gserver/layers/ScalingProjection.cpp b/paddle/gserver/layers/ScalingProjection.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c0a7072c6a7cc1d37723f43d1068483779f56437
--- /dev/null
+++ b/paddle/gserver/layers/ScalingProjection.cpp
@@ -0,0 +1,53 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "Projection.h"
+
+namespace paddle {
+
+class ScalingProjection : public Projection {
+public:
+  ScalingProjection(const ProjectionConfig& config,
+                    const ParameterPtr& parameter, bool useGpu)
+      : Projection(config, parameter, useGpu) {
+    CHECK_EQ(parameter->getSize(), 1UL);
+    weight_.reset(new Weight(1, 1, parameter));
+  }
+
+  void forward() {
+    CHECK(in_->value);
+    out_->value->add(*in_->value, weight_->getW()->getElement(0, 0));
+  }
+
+  void backward(const UpdateCallback& callback) {
+    if (weight_->getWGrad()) {
+      auto sum = Matrix::create(in_->value->getHeight(), 1, false, useGpu_);
+      sum->sumOfProducts(*in_->value, *out_->grad,
+                         /* scaleSum= */1, /* scaleDest= */0);
+      weight_->getWGrad()->sumCols(*sum,
+                                   /* scaleSum= */1, /* scaleDest= */1);
+      parameter_->incUpdate(callback);
+    }
+    if (in_->grad) {
+      in_->grad->add(*out_->grad, weight_->getW()->getElement(0, 0));
+    }
+  }
+
+protected:
+  std::unique_ptr<Weight> weight_;
+};
+
+REGISTER_PROJECTION(scaling, ScalingProjection);
+
+}  // namespace paddle
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index f3cd2b4faf0c173cbb4997aac1a00ebba3027c92..a79dfe39c9bb26c7b2acec1051699e1804494d93 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -135,6 +135,17 @@ TEST(Projection, identity) {
   }
 }
 
+TEST(Projection, scaling) {
+  ProjectionConfig conf;
+  conf.set_type("scaling");
+  conf.set_input_size(10);
+  conf.set_output_size(10);
+  for (auto useGpu : {false}) {
+    testProjectionGrad(conf, INPUT_DATA, /* parameterSize */ 1,
+                       /* batchSize */ 100, useGpu);
+  }
+}
+
 #ifndef PADDLE_ONLY_CPU
 TEST(Projection, conv) {
   const int NUM_FILTERS = 16;
diff --git a/paddle/math/BaseMatrix.cu b/paddle/math/BaseMatrix.cu
index d81b99e5441584b21fb023dcae65ccec7dd27996..54448bdb5a9bb4f665f28f973eada30a07fb5eee 100644
--- a/paddle/math/BaseMatrix.cu
+++ b/paddle/math/BaseMatrix.cu
@@ -1451,6 +1451,8 @@ int BaseMatrixT<real>::applyRow(Agg agg, BaseMatrixT& b) {
   MatrixOffset offset(0, 0, 0, 0, 0, 0);
   int numRows = b.height_;
   int numCols = b.width_;
+  CHECK_EQ(height_, numRows);
+  CHECK_EQ(width_, 1UL);
   aggregate(agg, base::unary::identity(), base::binary::second(), b, numRows,
             numCols, offset, false_type(), true_type() /*aAsColVector*/);
 
@@ -1463,18 +1465,69 @@ int BaseMatrixT<real>::applyRow(Agg agg, Saver sv, BaseMatrixT& b) {
   MatrixOffset offset(0, 0, 0, 0, 0, 0);
   int numRows = b.height_;
   int numCols = b.width_;
+  CHECK_EQ(height_, numRows);
+  CHECK_EQ(width_, 1UL);
   aggregate(agg, base::unary::identity(), sv, b, numRows, numCols, offset,
             false_type(), true_type() /*aAsColVector*/);
 
   return 0;
 }
 
+template<>
+template <class Agg>
+int BaseMatrixT<real>::applyRow(
+     Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b) {
+  if (scaleDest != 0) {
+    applyRow(agg, base::binary::add2(scaleDest, scaleAgg), b);
+  } else {
+    applyRow(agg, base::binary::second(), b);
+    if (scaleAgg != 1) {
+      mulScalar(scaleAgg);
+    }
+  }
+  return 0;
+}
+
+template<>
+template <class Agg, class Op, class Saver>
+int BaseMatrixT<real>::applyRow(Agg agg, Op op, Saver sv,
+                                BaseMatrixT& b, BaseMatrixT& c) {
+  MatrixOffset offset(0, 0, 0, 0, 0, 0);
+  int numRows = b.height_;
+  int numCols = b.width_;
+  CHECK_EQ(height_, numRows);
+  CHECK_EQ(width_, 1UL);
+  CHECK_EQ(c.height_, numRows);
+  CHECK_EQ(c.width_, numCols);
+  aggregate(agg, op, sv,
+            b, c, numRows, numCols, offset,
+            false_type(), true_type() /*aAsColVector*/);
+  return 0;
+}
+
+template<>
+template <class Agg, class Op>
+int BaseMatrixT<real>::applyRow(Agg agg, Op op, real scaleDest, real scaleAgg,
+                                BaseMatrixT& b, BaseMatrixT& c) {
+  if (scaleDest != 0) {
+    applyRow(agg, op, base::binary::add2(scaleDest, scaleAgg), b, c);
+  } else {
+    applyRow(agg, op, base::binary::second(), b, c);
+    if (scaleAgg != 1) {
+      mulScalar(scaleAgg);
+    }
+  }
+  return 0;
+}
+
 template<>
 template <class Agg>
 int BaseMatrixT<real>::applyCol(Agg agg, BaseMatrixT& b) {
   MatrixOffset offset(0, 0, 0, 0, 0, 0);
   int numRows = b.height_;
   int numCols = b.width_;
+  CHECK_EQ(width_, numCols);
+  CHECK_EQ(height_, 1UL);
   aggregate(agg, base::unary::identity(), base::binary::second(), b, numRows,
             numCols, offset, true_type() /*aAsRowVector*/, false_type());
 
@@ -1487,6 +1540,8 @@ int BaseMatrixT<real>::applyCol(Agg agg, Saver sv, BaseMatrixT& b) {
   MatrixOffset offset(0, 0, 0, 0, 0, 0);
   int numRows = b.height_;
   int numCols = b.width_;
+  CHECK_EQ(width_, numCols);
+  CHECK_EQ(height_, 1UL);
   aggregate(agg, base::unary::identity(), sv, b, numRows, numCols, offset,
             true_type() /*aAsRowVector*/, false_type());
 
@@ -1494,8 +1549,23 @@ int BaseMatrixT<real>::applyCol(Agg agg, Saver sv, BaseMatrixT& b) {
 }
 
 template<>
-void BaseMatrixT<real>::sumRows(BaseMatrixT& b) {
-  applyRow(aggregate::sum(), b);
+template <class Agg>
+int BaseMatrixT<real>::applyCol(
+     Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b) {
+  if (scaleDest != 0) {
+    applyCol(agg, base::binary::add2(scaleDest, scaleAgg), b);
+  } else {
+    applyCol(agg, base::binary::second(), b);
+    if (scaleAgg != 1) {
+      mulScalar(scaleAgg);
+    }
+  }
+  return 0;
+}
+
+template<>
+void BaseMatrixT<real>::sumRows(BaseMatrixT& b, real scaleSum, real scaleDest) {
+  applyRow(aggregate::sum(), scaleDest, scaleSum, b);
 }
 
 template<>
@@ -1524,18 +1594,22 @@ void BaseMatrixT<real>::minCols(BaseMatrixT& b) {
 }
 
 template<>
-void BaseMatrixT<real>::sumCols(BaseMatrixT& b, real scale) {
-  applyCol(aggregate::sum(), base::binary::add2(1.0, scale), b);
+void BaseMatrixT<real>::sumCols(BaseMatrixT& b, real scaleSum, real scaleDest) {
+  applyCol(aggregate::sum(), scaleDest, scaleSum, b);
 }
 
 template<>
-void BaseMatrixT<real>::sumOfSquares(BaseMatrixT& b, BaseMatrixT& c) {
-  int numRows = b.height_;
-  int numCols = b.width_;
-  MatrixOffset offset(0, 0, 0, 0, 0, 0);
-  aggregate(aggregate::sum(), base::binary::squaredDiff(), base::binary::add(),
-            b, c, numRows, numCols, offset, false_type(),
-            true_type() /*aAsColVector*/);
+void BaseMatrixT<real>::sumOfSquaredDiffs(
+    BaseMatrixT& b, BaseMatrixT& c, real scaleSum, real scaleDest) {
+  applyRow(aggregate::sum(), base::binary::squaredDiff(),
+           scaleDest, scaleSum, b, c);
+}
+
+template<>
+void BaseMatrixT<real>::sumOfProducts(
+    BaseMatrixT& b, BaseMatrixT& c, real scaleSum, real scaleDest) {
+  applyRow(aggregate::sum(), base::binary::mul(),
+           scaleDest, scaleSum, b, c);
 }
 
 template class BaseMatrixT<real>;
diff --git a/paddle/math/BaseMatrix.h b/paddle/math/BaseMatrix.h
index 2dd2c2c7a9b985924d53cb3bf8840eb1e55eee3e..3a91fdc3c30c5332866a97c256b018eb0982260f 100644
--- a/paddle/math/BaseMatrix.h
+++ b/paddle/math/BaseMatrix.h
@@ -305,6 +305,23 @@ public:
   template <class Agg>
   int applyRow(Agg agg, BaseMatrixT& b);
 
+  /**
+   * a aggregate expression that apply each row of matrix b.
+   *
+   * @code
+   * for each row i & 0 <= j < b.width_, do:
+   *   dst = agg(op(b[i*ldb + j], c[i*ldc + j])
+   *   this[i] = sv(this[i], dst)
+   * @endcode
+   */
+  template <class Agg, class Op, class Saver>
+  int applyRow(Agg agg, Op op, Saver sv, BaseMatrixT& b, BaseMatrixT& c);
+
+  // Same as the above with the special handing of sv=add2(scaleDest, scaleAgg)
+  template <class Agg, class Op>
+  int applyRow(Agg agg, Op op, real scaleDest, real scaleAgg,
+               BaseMatrixT& b, BaseMatrixT& c);
+
   /**
    * a aggregate expression that apply each row of matrix b.
    *
@@ -317,6 +334,10 @@ public:
   template <class Agg, class Saver>
   int applyRow(Agg agg, Saver sv, BaseMatrixT& b);
 
+  // Same as the above with the special handing of sv=add2(scaleDest, scaleAgg)
+  template <class Agg>
+  int applyRow(Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b);
+
   /**
    * a aggregate expression that apply each column of matrix b.
    *
@@ -340,6 +361,10 @@ public:
   template <class Agg, class Saver>
   int applyCol(Agg agg, Saver sv, BaseMatrixT& b);
 
+  // Same as the above with the special handing of sv=add2(scaleDest, scaleAgg)
+  template <class Agg>
+  int applyCol(Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b);
+
   bool useGpu() const { return useGpu_; }
 
   const T* rowBuf(size_t row) const { return data_ + width_ * row; }
@@ -920,7 +945,9 @@ public:
   void addRowScale(size_t cCol, BaseMatrixT& b, BaseMatrixT& c);
 
   /// calculate the sum of each row of the matrix b.
-  void sumRows(BaseMatrixT& b);
+  /// this_i = scaleDest * this_i + scaleSum * \sum_j b_{ij}
+  void sumRows(BaseMatrixT& b, T scaleSum, T scaleDest);
+
   /// calculate the maximum value of each row of the matrix b.
   void maxRows(BaseMatrixT& b);
   /// calculate the minimum value of each row of the matrix b.
@@ -932,10 +959,18 @@ public:
   void maxCols(BaseMatrixT& b);
   /// calculate the minimum value of each column of the matrix b.
   void minCols(BaseMatrixT& b);
-  void sumCols(BaseMatrixT& b, T scale);
 
-  /// calculate the sum of each row of (b - c)^2.
-  void sumOfSquares(BaseMatrixT& b, BaseMatrixT& c);
+  /// calculate the sum of each column of the matrix b.
+  /// this_i = scaleDest * this_i + scaleSum * \sum_j b_{ji}
+  void sumCols(BaseMatrixT& b, T scaleSum, T scaleDest);
+
+  /// this_i = scaleDest * this_i + scaleSum * \sum_j (b_{ij} - c_{ij})^2
+  void sumOfSquaredDiffs(BaseMatrixT& b, BaseMatrixT& c,
+                         T scaleSum, T scaleDest);
+
+  /// this_i = scaleDest * this_i + scaleSum * \sum_j b_{ij} * c_{ij}
+  void sumOfProducts(BaseMatrixT& b, BaseMatrixT& c,
+                     T scaleSum, T scaleDest);
 
   /**
    * @code
diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp
index 5ee8fbebfcfbe9696f7836b6b1c88e724551da8e..706a598d0c33762b0578190ea4a0aa06247a88ef 100644
--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
@@ -242,7 +242,7 @@ real GpuMatrix::getSum() {
 void GpuMatrix::accumulateColSum(Matrix& src) {
   CHECK_EQ(getWidth(), src.getWidth());
   CHECK_EQ(getHeight(), (size_t)1);
-  sumCols(src, 1.0);
+  sumCols(src, 1.0, 1.0);
 }
 
 real GpuMatrix::getAbsSum() {
@@ -389,7 +389,7 @@ void GpuMatrix::collectBias(Matrix& a, real scale) {
   CHECK_EQ(width_, a.getWidth());
   GpuSparseMatrix* sMatPtr = dynamic_cast<GpuSparseMatrix*>(&a);
   if (!sMatPtr) {
-    sumCols(a, scale);
+    sumCols(a, /* scaleSum= */scale, /* scaleDest= */1);
   } else {
     real* data = getData();
     hl_sparse_matrix_s A_d = sMatPtr->sMatrix_.get();
@@ -589,7 +589,7 @@ void GpuMatrix::addToRows(Matrix& table, IVector& ids) {
 void GpuMatrix::colMerge(Matrix& src) {
   CHECK(src.height_ == height_);
   if (!trans_ && !src.trans_) {
-    sumRows(src);
+    sumRows(src, /* scaleSum= */1, /* scaleDest= */0);
   } else {
     LOG(FATAL) << "Is not supported";
   }
@@ -599,7 +599,7 @@ void GpuMatrix::rowSum(Matrix& sum) {
   CHECK_EQ(sum.getHeight(), getHeight());
   CHECK_EQ(sum.getWidth(), (size_t)1);
 
-  sum.sumRows(*this);
+  sum.sumRows(*this, /* scaleSum= */1, /* scaleDest= */0);
 }
 
 void GpuMatrix::rowMax(Matrix& max) {
@@ -790,7 +790,8 @@ void GpuMatrix::sumOfSquares(Matrix& output, Matrix& label) {
     LOG(FATAL) << "not supported: GpuSparseMatrix as label";
   }
 
-  BaseMatrix::sumOfSquares(output, label);
+  BaseMatrix::sumOfSquaredDiffs(output, label,
+                                /* scaleSum= */1, /* scaleDest= */1);
 }
 
 void GpuMatrix::sumOfSquaresBp(Matrix& outputV, Matrix& label) {
@@ -1501,7 +1502,7 @@ void CpuMatrix::accumulateColSum(Matrix& src) {
   CHECK_EQ(getWidth(), src.getWidth());
   CHECK_EQ(getHeight(), (size_t)1);
 
-  sumCols(src, 1.0);
+  sumCols(src, /* scaleSum= */1, /* scaleDest= */1);
 }
 
 real CpuMatrix::getAbsSum() {
@@ -2188,7 +2189,7 @@ void CpuMatrix::collectBias(Matrix& a, real scale) {
   CHECK_EQ(width_, a.getWidth());
   CpuSparseMatrix* aptr = dynamic_cast<CpuSparseMatrix*>(&a);
   if (!aptr) {
-    sumCols(a, scale);
+    sumCols(a, /* scaleSum= */scale, /* scaleDest= */1);
   } else {
     size_t nnz = aptr->getElementCnt();
     int* cols = aptr->getCols();
@@ -2227,7 +2228,7 @@ void CpuMatrix::sequenceAvgForward(Matrix& a,
   real* dst = getData();
   real* src = a.getData();
   const int* starts = startsPos.getData();
-  MatrixPtr outMtx = Matrix::create(1, 1, false, false);
+  MatrixPtr outMtx = Matrix::create(nullptr, 1, width, false, false);
   MatrixPtr dataMtx = Matrix::create(nullptr, 1, width, false, false);
   for (size_t i = 0; i < height; i++) {
     int sequenceLength = starts[i + 1] - starts[i];
@@ -2239,13 +2240,15 @@ void CpuMatrix::sequenceAvgForward(Matrix& a,
     dataMtx->setData(src + starts[i] * width, sequenceLength, width);
     if (mode == 0) {
       // plain average
-      outMtx->sumCols(*dataMtx, (real)1 / (real)sequenceLength);
+      outMtx->sumCols(*dataMtx, (real)1 / (real)sequenceLength,
+                      /* scaleDest= */1);
     } else if (mode == 1) {
       // sum instead of average
-      outMtx->sumCols(*dataMtx, (real)1);
+      outMtx->sumCols(*dataMtx,  /* scaleSum= */1, /* scaleDest= */1);
     } else if (mode == 2) {
       // divide by square root of sequenceLength
-      outMtx->sumCols(*dataMtx, (real)1 / std::sqrt(sequenceLength));
+      outMtx->sumCols(*dataMtx, (real)1 / std::sqrt(sequenceLength),
+                      /* scaleDest= */1);
     } else {
       LOG(FATAL) << "should not reach here";
     }
@@ -2932,7 +2935,7 @@ void CpuMatrix::rowSum(Matrix& sum) {
   CHECK_EQ(sum.getHeight(), getHeight());
   CHECK_EQ(sum.getWidth(), (size_t)1);
 
-  sum.sumRows(*this);
+  sum.sumRows(*this, /* scaleSum= */1, /* scaleDest= */0);
 }
 
 void CpuMatrix::rowMaxId(IVector& maxIds) {
@@ -3485,7 +3488,8 @@ void CpuMatrix::sumOfSquares(Matrix& output, Matrix& label) {
     }
   }
 
-  BaseMatrix::sumOfSquares(output, label);
+  BaseMatrix::sumOfSquaredDiffs(output, label,
+                                /* scaleSum= */1, /* scaleDest= */1);
 }
 
 /* calculate the error of outputV according to label */
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 881f0b821491be12e57be0fef04a38fc95fca4eb..3e55a9f9f565a7719fb20b3a5dda6b61e8961d5b 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -592,6 +592,20 @@ class DotMulProjection(Projection):
     def calc_parameter_dims(self, input_size, output_size):
         return [1, output_size]
 
+# ScalingProjection
+@config_class
+class ScalingProjection(Projection):
+    type = 'scaling'
+
+    def calc_output_size(self, input_layer_config):
+        return input_layer_config.size
+
+    def calc_parameter_size(self, input_size, output_size):
+        return 1
+
+    def calc_parameter_dims(self, input_size, output_size):
+        return [1, 1]
+
 
 @config_class
 class TableProjection(Projection):
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 796121a64136ee3f31b2ed09b761c6a83cdbe625..b5e10ef81009a00e76b0c4147b404ba0aaba72b3 100644
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -65,6 +65,7 @@ __all__ = [
     'StaticInput',
     'expand_layer',
     'scaling_layer',
+    'scaling_projection',
     'power_layer',
     'interpolation_layer',
     'bilinear_interp_layer',
@@ -458,7 +459,7 @@ def identity_projection(input, offset=None):
     :type input: LayerOutput
     :param offset: Offset, None if use default.
     :type offset: int
-    :return: A IdentityProjection or IdentityOffsetProjection Object
+    :return: A IdentityProjection or IdentityOffsetProjection object
     :rtype: IdentityProjection or IdentityOffsetProjection
     """
     if offset is None:
@@ -471,6 +472,34 @@ def identity_projection(input, offset=None):
     return proj
 
 
+@wrap_param_attr_default()
+def scaling_projection(input, param_attr=None):
+    """
+    scaling_projection multiplies the input with a scalar parameter and add to
+    the output.
+
+    .. math::
+       out += w * in
+
+    The example usage is:
+
+    .. code-block:: python
+
+       proj = scaling_projection(input=layer)
+
+    :param input: Input Layer.
+    :type input: LayerOutput
+    :param param_attr: Parameter config, None if use default.
+    :type param_attr: ParameterAttribute
+    :return: A ScalingProjection object
+    :rtype: ScalingProjection
+    """
+    proj = ScalingProjection(input_layer_name=input.name,
+                             **param_attr.attr)
+    proj.origin = input
+    return proj
+
+
 @wrap_param_attr_default()
 def dotmul_projection(input, param_attr=None):
     """
@@ -1426,11 +1455,11 @@ def bilinear_interp_layer(input,
     .. code-block:: python
 
        bilinear = bilinear_interp_layer(input=layer1, out_size_x=64, out_size_y=64)
-    
+
     :param   input:        A input layer.
     :type    input:        LayerOutput.
     :param   out_size_x:   bilinear interpolation output width.
-    :type    out_size_x:   int|None 
+    :type    out_size_x:   int|None
     :param   out_size_y:   bilinear interpolation output height.
     :type    out_size_y:   int|None
     :param   name:         The layer's name, which cna not be specified.
@@ -1742,11 +1771,11 @@ def img_conv_layer(input,
     The details of convolution layer, please refer UFLDL's `convolution
     <http://ufldl.stanford.edu/tutorial/supervised/
     FeatureExtractionUsingConvolution/>`_ .
-    
-    Convolution Transpose (deconv) layer for image. Paddle only support square 
+
+    Convolution Transpose (deconv) layer for image. Paddle only support square
     input currently and thus input image's width equals height.
 
-    The details of convolution transpose layer, 
+    The details of convolution transpose layer,
     please refer to the following explanation and references therein
     <http://datascience.stackexchange.com/questions/6107/
     what-are-deconvolutional-layers/>`_ .
@@ -4392,7 +4421,7 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
 
     .. code-block:: python
 
-       cost = cross_entropy(input=input_layer, 
+       cost = cross_entropy(input=input_layer,
                             label=label_layer)
 
     :param input: The first input layer.
@@ -4432,7 +4461,7 @@ def cross_entropy_with_selfnorm(input,
 
     .. code-block:: python
 
-       cost = cross_entropy_with_selfnorm(input=input_layer, 
+       cost = cross_entropy_with_selfnorm(input=input_layer,
                                           label=label_layer)
 
     :param input: The first input layer.
@@ -4502,7 +4531,7 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None):
 
     .. code-block:: python
 
-       cost = huber_cost(input=input_layer, 
+       cost = huber_cost(input=input_layer,
                          label=label_layer)
 
     :param input: The first input layer.
@@ -4542,7 +4571,7 @@ def multi_binary_label_cross_entropy(input,
 
     .. code-block:: python
 
-       cost = multi_binary_label_cross_entropy(input=input_layer, 
+       cost = multi_binary_label_cross_entropy(input=input_layer,
                                                label=label_layer)
 
     :param input: The first input layer.
diff --git a/python/paddle/trainer_config_helpers/tests/configs/projections.py b/python/paddle/trainer_config_helpers/tests/configs/projections.py
index 19ac6ec9061d67fa73e10e95e15fde5322b00503..aa4521dcd5db3f845871cfaaedb02a86bcbddc38 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/projections.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/projections.py
@@ -26,6 +26,7 @@ with mixed_layer() as m5:
 
 with mixed_layer() as m6:
     m6 += dotmul_operator(a=m3, b=m4)
+    m6 += scaling_projection(m3)
 
 img = data_layer(name='img', size=32 * 32)
 flt = data_layer(name='filter', size=3 * 3 * 1 * 64)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
index e47e531a2223ddaa9dd1dfaf1fcee8a11008cbbd..2b3951c242411e0c0990a52bcb2ae6b1723a9367 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
@@ -111,13 +111,23 @@ layers {
   inputs {
     input_layer_name: "__mixed_2__"
   }
+  inputs {
+    input_layer_name: "__mixed_2__"
+    input_parameter_name: "___mixed_5__.w1"
+    proj_conf {
+      type: "scaling"
+      name: "___mixed_5__.w1"
+      input_size: 100
+      output_size: 100
+    }
+  }
   inputs {
     input_layer_name: "__mixed_3__"
   }
   operator_confs {
     type: "dot_mul"
     input_indices: 0
-    input_indices: 1
+    input_indices: 2
     input_sizes: 100
     input_sizes: 100
     output_size: 100
@@ -258,6 +268,16 @@ parameters {
   initial_strategy: 0
   initial_smart: false
 }
+parameters {
+  name: "___mixed_5__.w1"
+  size: 1
+  initial_mean: 0.0
+  initial_std: 1.0
+  dims: 1
+  dims: 1
+  initial_strategy: 0
+  initial_smart: true
+}
 parameters {
   name: "___mixed_7__.w0"
   size: 30000