From 781b85b5fc58aa45d163b0d9e78f8a757f2dbd25 Mon Sep 17 00:00:00 2001
From: Haonan <haonanyu@baidu.com>
Date: Thu, 1 Dec 2016 19:49:30 -0800
Subject: [PATCH] rotate_layer and flip_layer * added getMin and getMax for
 GpuMatrix * gru_step_layer parameter name

---
 paddle/gserver/layers/TransLayer.h            |  2 +-
 paddle/gserver/tests/test_LayerGrad.cpp       | 15 +++
 paddle/math/CpuSparseMatrix.cpp               |  2 +-
 paddle/math/CpuSparseMatrix.h                 |  2 +-
 paddle/math/Matrix.cpp                        | 58 ++++++++++-
 paddle/math/Matrix.h                          | 27 +++--
 paddle/math/SparseMatrix.cpp                  |  2 +-
 paddle/math/SparseMatrix.h                    |  2 +-
 paddle/math/tests/test_SparseMatrix.cpp       |  6 +-
 paddle/math/tests/test_matrixCompare.cpp      | 17 +++-
 proto/ModelConfig.proto                       |  8 +-
 python/paddle/trainer/config_parser.py        | 18 +++-
 python/paddle/trainer/recurrent_units.py      | 10 +-
 .../paddle/trainer_config_helpers/layers.py   | 99 +++++++++++++++++--
 .../paddle/trainer_config_helpers/networks.py | 12 +--
 .../tests/layers_test_config.py               |  6 +-
 16 files changed, 242 insertions(+), 44 deletions(-)
 mode change 100644 => 100755 python/paddle/trainer/recurrent_units.py
 mode change 100644 => 100755 python/paddle/trainer_config_helpers/layers.py
 mode change 100644 => 100755 python/paddle/trainer_config_helpers/networks.py

diff --git a/paddle/gserver/layers/TransLayer.h b/paddle/gserver/layers/TransLayer.h
index b43fa1ebfb..1d5a370b34 100644
--- a/paddle/gserver/layers/TransLayer.h
+++ b/paddle/gserver/layers/TransLayer.h
@@ -20,7 +20,7 @@ limitations under the License. */
 
 namespace paddle {
 /**
- * A layer for transposition.
+ * A layer for transposing a minibatch matrix.
  * \f[
      y = x^\mathrm{T}
  * \f]
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index 813a99d8cd..f7a6c672de 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -1316,6 +1316,21 @@ TEST(Layer, ResizeLayer) {
   }
 }
 
+TEST(Layer, RotateLayer) {
+  TestConfig config;
+  config.biasSize = 0;
+  config.layerConfig.set_type("rotate");
+  const int INPUT_SIZE = 64;  //  height * width
+  config.layerConfig.set_size(INPUT_SIZE);
+  config.layerConfig.set_height(32);
+  config.inputDefs.push_back({INPUT_DATA, "layer_0", INPUT_SIZE, 0});
+  config.layerConfig.add_inputs();
+
+  for (auto useGpu : {false, true}) {
+    testLayerGrad(config, "rotate", 100, false, useGpu);
+  }
+}
+
 TEST(Layer, NCELayer) {
   TestConfig config;
   size_t numClasses = 4;
diff --git a/paddle/math/CpuSparseMatrix.cpp b/paddle/math/CpuSparseMatrix.cpp
index 82a482f701..bf62229c03 100644
--- a/paddle/math/CpuSparseMatrix.cpp
+++ b/paddle/math/CpuSparseMatrix.cpp
@@ -372,7 +372,7 @@ MatrixPtr CpuSparseMatrix::subMatrix(size_t startRow, size_t numRows) {
 }
 
 /* mem MUST be alloced outside (memAlloc=false) */
-void CpuSparseMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
+void CpuSparseMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) {
   CHECK(!memAlloc);
   CpuSparseMatrix* mat = dynamic_cast<CpuSparseMatrix*>(matTrans.get());
   if (format_ == SPARSE_CSR) {
diff --git a/paddle/math/CpuSparseMatrix.h b/paddle/math/CpuSparseMatrix.h
index d3e8871cb5..860cad1047 100644
--- a/paddle/math/CpuSparseMatrix.h
+++ b/paddle/math/CpuSparseMatrix.h
@@ -201,7 +201,7 @@ public:
   void zeroMem();
 
   /// mem MUST be alloced outside (memAlloc=false)
-  void transpose(MatrixPtr matTrans, bool memAlloc);
+  void transpose(MatrixPtr& matTrans, bool memAlloc);
 
   void mul(const Matrix& A, const Matrix& B, real alpha, real beta);
 
diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp
index 3ae237bc7d..3c09f010bb 100644
--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
@@ -274,6 +274,18 @@ real GpuMatrix::getSum() {
   return sum;
 }
 
+real GpuMatrix::getMin() {
+  CHECK(isContiguous());
+  auto vec = GpuVector(height_ * width_, data_);
+  return vec.getMin();
+}
+
+real GpuMatrix::getMax() {
+  CHECK(isContiguous());
+  auto vec = GpuVector(height_ * width_, data_);
+  return vec.getMax();
+}
+
 void GpuMatrix::accumulateColSum(Matrix& src) {
   CHECK_EQ(getWidth(), src.getWidth());
   CHECK_EQ(getHeight(), (size_t)1);
@@ -371,7 +383,7 @@ MatrixPtr GpuMatrix::getTranspose() {
   }
 }
 
-void GpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
+void GpuMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) {
   if (memAlloc) {
     matTrans = std::make_shared<GpuMatrix>(width_, height_);
   } else {
@@ -385,13 +397,29 @@ void GpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
   hl_matrix_transpose(data, dataTrans, height_, width_, lda, ldc);
 }
 
+void GpuMatrix::rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) {
+  if (memAlloc) {
+    matRot = std::make_shared<GpuMatrix>(width_, height_);
+  } else {
+    CHECK(matRot != NULL);
+  }
+
+  MatrixPtr cpuMat = std::make_shared<CpuMatrix>(height_, width_);
+  cpuMat->copyFrom(*this);
+
+  MatrixPtr cpuMatRot = std::make_shared<CpuMatrix>(width_, height_);
+  cpuMat->rotate(cpuMatRot, false, clockWise);
+
+  matRot->copyFrom(*cpuMatRot);
+}
+
 MatrixPtr GpuMatrix::getInverse() {
   MatrixPtr matInv;
   inverse(matInv, true);
   return matInv;
 }
 
-void GpuMatrix::inverse(MatrixPtr matInv, bool memAlloc) {
+void GpuMatrix::inverse(MatrixPtr& matInv, bool memAlloc) {
   CHECK_EQ(height_, width_);
 
   if (memAlloc) {
@@ -1690,7 +1718,7 @@ MatrixPtr CpuMatrix::getTranspose() {
   }
 }
 
-void CpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
+void CpuMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) {
   if (memAlloc) {
     matTrans = std::make_shared<CpuMatrix>(width_, height_);
   } else {
@@ -1708,13 +1736,35 @@ void CpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
   }
 }
 
+void CpuMatrix::rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) {
+  if (memAlloc) {
+    matRot = std::make_shared<CpuMatrix>(width_, height_);
+  } else {
+    CHECK(matRot != NULL);
+  }
+  real* dataRot = matRot->getData();
+  real* data = getData();
+  int lda = getStride();
+  int ldc = matRot->getStride();
+
+  for (size_t i = 0; i < height_; i++) {
+    for (size_t j = 0; j < width_; j++) {
+      if (clockWise) {
+        dataRot[j * ldc + i] = data[(height_ - i - 1) * lda + j];
+      } else {
+        dataRot[j * ldc + i] = data[i * lda + (width_ - j - 1)];
+      }
+    }
+  }
+}
+
 MatrixPtr CpuMatrix::getInverse() {
   MatrixPtr matInv;
   inverse(matInv, true);
   return matInv;
 }
 
-void CpuMatrix::inverse(MatrixPtr matInv, bool memAlloc) {
+void CpuMatrix::inverse(MatrixPtr& matInv, bool memAlloc) {
   CHECK_EQ(height_, width_);
 
   if (memAlloc) {
diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h
index 57c0c2fe40..d123f827c9 100644
--- a/paddle/math/Matrix.h
+++ b/paddle/math/Matrix.h
@@ -372,7 +372,17 @@ public:
    * allocate matTrans' memory outside, then set memAlloc as false;
    * else set as true.
    */
-  virtual void transpose(MatrixPtr matTrans, bool memAlloc) {
+  virtual void transpose(MatrixPtr& matTrans, bool memAlloc) {
+    LOG(FATAL) << "Not implemented";
+  }
+
+  /**
+   * @brief  rotate clock-wise.
+   *
+   * allocate matTrans' memory outside, then set memAlloc as false;
+   * else set as true.
+   */
+  virtual void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) {
     LOG(FATAL) << "Not implemented";
   }
 
@@ -387,7 +397,7 @@ public:
    * if allocate matInv's memory outside, then set memAlloc as false;
    * else set as true.
    */
-  virtual void inverse(MatrixPtr matInv, bool memAlloc) {
+  virtual void inverse(MatrixPtr& matInv, bool memAlloc) {
     LOG(FATAL) << "Not implemented";
   }
 
@@ -1169,11 +1179,15 @@ public:
   void accumulateColSum(Matrix& src);
   real getAbsSum();
 
+  real getMin();
+  real getMax();
+
   MatrixPtr getTranspose();
-  void transpose(MatrixPtr matTrans, bool memAlloc);
+  void transpose(MatrixPtr& matTrans, bool memAlloc);
+  void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise);
 
   MatrixPtr getInverse();
-  void inverse(MatrixPtr matInv, bool memAlloc);
+  void inverse(MatrixPtr& matInv, bool memAlloc);
 
   /// add b to each sample of this.
   void addBias(Matrix& b, real scale);
@@ -1485,10 +1499,11 @@ public:
   real getAbsSum();
 
   MatrixPtr getTranspose();
-  void transpose(MatrixPtr matTrans, bool memAlloc);
+  void transpose(MatrixPtr& matTrans, bool memAlloc);
+  void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise);
 
   MatrixPtr getInverse();
-  void inverse(MatrixPtr matInv, bool memAlloc);
+  void inverse(MatrixPtr& matInv, bool memAlloc);
 
   void copyFrom(const Matrix& src);
 
diff --git a/paddle/math/SparseMatrix.cpp b/paddle/math/SparseMatrix.cpp
index 3bae6d373f..6370c77386 100644
--- a/paddle/math/SparseMatrix.cpp
+++ b/paddle/math/SparseMatrix.cpp
@@ -497,7 +497,7 @@ void GpuSparseMatrix::setRow(size_t row,
 
 SparseValueType GpuSparseMatrix::getValueType() const { return valueType_; }
 
-void GpuSparseMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
+void GpuSparseMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) {
   CHECK_EQ(format_, SPARSE_CSC);
   int nnz = sMatrix_->nnz;
   if (memAlloc) {
diff --git a/paddle/math/SparseMatrix.h b/paddle/math/SparseMatrix.h
index 1d3801548e..f6cd5df338 100644
--- a/paddle/math/SparseMatrix.h
+++ b/paddle/math/SparseMatrix.h
@@ -109,7 +109,7 @@ public:
   MatrixPtr getTranspose();
 
   /// B = A'
-  void transpose(MatrixPtr matTrans, bool memAlloc);
+  void transpose(MatrixPtr& matTrans, bool memAlloc);
 
   void copyFrom(const Matrix& src);
   void copyFrom(const Matrix& src, hl_stream_t stream);
diff --git a/paddle/math/tests/test_SparseMatrix.cpp b/paddle/math/tests/test_SparseMatrix.cpp
index 9d3fbaef43..c0572dfdbf 100644
--- a/paddle/math/tests/test_SparseMatrix.cpp
+++ b/paddle/math/tests/test_SparseMatrix.cpp
@@ -248,11 +248,13 @@ TEST(Matrix, SparseMatrixTranspose) {
             /*dense matrix transpose*/
             CpuMatrixPtr matC(new CpuMatrix(height, width));
             matC->copyFrom(*matA);
-            CpuMatrixPtr matD(new CpuMatrix(width, height));
+            MatrixPtr matD(new CpuMatrix(width, height));
             matC->transpose(matD, false);
+
             /*check result*/
             checkSMatrixEqual2Dense(
-                std::dynamic_pointer_cast<CpuSparseMatrix>(matB), matD);
+                std::dynamic_pointer_cast<CpuSparseMatrix>(matB),
+                std::dynamic_pointer_cast<CpuMatrix>(matD));
           }
         }
       }
diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp
index f0c49791d7..c062ca1080 100644
--- a/paddle/math/tests/test_matrixCompare.cpp
+++ b/paddle/math/tests/test_matrixCompare.cpp
@@ -105,6 +105,21 @@ void testMatrixGetSum(int height, int width) {
   EXPECT_LE(fabs(cpuSum - gpuSum), err);
 }
 
+void testMatrixGetMinMax(int height, int width) {
+  MatrixPtr cpuInput = std::make_shared<CpuMatrix>(height, width);
+  MatrixPtr gpuInput = std::make_shared<GpuMatrix>(height, width);
+  cpuInput->randomizeUniform();
+  gpuInput->copyFrom(*cpuInput);
+
+  real cpuMin = cpuInput->getMin();
+  real gpuMin = gpuInput->getMin();
+  real cpuMax = cpuInput->getMax();
+  real gpuMax = gpuInput->getMax();
+
+  EXPECT_EQ(cpuMin, gpuMin);
+  EXPECT_EQ(cpuMax, gpuMax);
+}
+
 void testMatrixZeroAtOffset(int height, int width) {
   MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
   MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
@@ -181,7 +196,7 @@ void testMatrixInverse(int height) {
   cpu->add(*outputCheck);
 
   gpu->copyFrom(*cpu);
-  cpu->inverse(cpuI, false);
+  cpu->inverse(cpuI, true);
   gpu->inverse(gpuI, false);
 
   TensorCheckErr(*cpuI, *gpuI);
diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto
index 0456404832..be4634d510 100644
--- a/proto/ModelConfig.proto
+++ b/proto/ModelConfig.proto
@@ -427,14 +427,14 @@ message LayerConfig {
   // bias size
   optional uint32 bias_size = 48 [default = 0];
 
-  // this parameter can be used as a user-defined parameter when necessary, 
+  // this parameter can be used as a user-defined parameter when necessary,
   // without changing the proto file.
-  // e.g., when a new layer with a user-defined parameter is implemented, 
+  // e.g., when a new layer with a user-defined parameter is implemented,
   // it can be used to pass that parameter, without modifying the proto file.
   // string type is used for flexibility: different types can be converted
-  // to string and reinterpreted in the user's own layer implementation.  
+  // to string and reinterpreted in the user's own layer implementation.
   optional string user_arg = 49;
-  
+
   // to indicate rectangle image data
   optional uint64 height = 50;
   optional uint64 width = 51;
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 4fbf076ae9..97744ed32e 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -830,7 +830,6 @@ class Pool(Cfg):
             channels,
             size_x,
             size_y=None,
-            img_width=None,
             start=None,
             stride=None,  # 1 by defalut in protobuf
             stride_y=None,
@@ -1834,6 +1833,7 @@ class PoolLayer(LayerBase):
                                pool_conf.channels)
 
 
+
 @config_layer('spp')
 class SpatialPyramidPoolLayer(LayerBase):
     def __init__(self, name, inputs, **xargs):
@@ -1927,8 +1927,8 @@ class BatchNormLayer(LayerBase):
         image_conf = self.config.inputs[0].image_conf
         parse_image(self.inputs[0].image, input_layer.name, image_conf)
 
-        # Only pass the width and height of input to batch_norm layer 
-        # when either of it is non-zero. 
+        # Only pass the width and height of input to batch_norm layer
+        # when either of it is non-zero.
         if input_layer.width != 0 or input_layer.height != 0:
             self.set_cnn_layer(name, image_conf.img_size_y, image_conf.img_size,
                                image_conf.channels, False)
@@ -1968,6 +1968,18 @@ class ResizeLayer(LayerBase):
             'ResizeLayer must have one and only one input')
 
 
+@config_layer('rotate')
+class RotateLayer(LayerBase):
+    def __init__(self, name, inputs, height, device=None):
+        super(RotateLayer, self).__init__(
+            name, 'rotate', 0, inputs=inputs, device=device)
+        config_assert(
+            len(self.inputs) == 1,
+            'RotateLayer must have one and only one input')
+        self.config.height = height
+        self.set_layer_size(self.get_input_layer(0).size)
+
+
 @config_layer('blockexpand')
 class BlockExpandLayer(LayerBase):
     def __init__(self, name, inputs, **xargs):
diff --git a/python/paddle/trainer/recurrent_units.py b/python/paddle/trainer/recurrent_units.py
old mode 100644
new mode 100755
index ff7e8932dc..ef92107a10
--- a/python/paddle/trainer/recurrent_units.py
+++ b/python/paddle/trainer/recurrent_units.py
@@ -15,10 +15,10 @@
 # recurrent_units.py
 # Version 2.0
 #
-# Some recurrent units can be used in recurrent layer group, 
+# Some recurrent units can be used in recurrent layer group,
 #   to use these units, import this module in your config_file:
-#     import trainer.recurrent_units 
-# 
+#     import trainer.recurrent_units
+#
 # The modules in this file are DEPRECATED.
 # If you would like to use lstm/gru
 # please use the functions defined in paddle.trainer_config_helpers.
@@ -29,7 +29,7 @@ from paddle.trainer.config_parser import *
 # long short term memory, can be used in recurrent machine
 # *inputs* must be a list of Projections, for example:
 #   inputs = [FullMatrixProjection("input_layer_name")],
-# *para_prefix* defines parameter names, if the *para_prefix* of 
+# *para_prefix* defines parameter names, if the *para_prefix* of
 #   two LstmRecurrentUnit is same, they share same parameters
 # *out_memory* can be defined outside if it's used outside
 def LstmRecurrentUnit(name,
@@ -197,7 +197,7 @@ def LstmRecurrentLayerGroup(name,
 # gated recurrent unit, can be used in recurrent machine
 # *inputs* should be a list of Projections, for example:
 #   inputs = [FullMatrixProjection("input_layer_name")],
-# *para_prefix* defines parameter names, if the *para_prefix* of 
+# *para_prefix* defines parameter names, if the *para_prefix* of
 #   two GatedRecurrentUnit is same, they share same parameters
 # *out_memory* can be defined outside if it's used outside
 
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
old mode 100644
new mode 100755
index f0b5d7c3b4..c502885cf9
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -70,6 +70,8 @@ __all__ = [
     'interpolation_layer',
     'bilinear_interp_layer',
     'trans_layer',
+    'rotate_layer',
+    'flip_layer',
     'sum_to_one_norm_layer',
     'get_output_layer',
     'LayerType',
@@ -154,6 +156,7 @@ class LayerType(object):
     POWER_LAYER = 'power'
     SCALING_LAYER = 'scaling'
     TRANS_LAYER = 'trans'
+    ROTATE_LAYER = 'rotate'
     OUT_PROD_LAYER = 'out_prod'
     FEATURE_MAP_EXPAND_LAYER = 'featmap_expand'
 
@@ -1642,7 +1645,7 @@ def scaling_layer(input, weight, name=None, layer_attr=None):
 @layer_support()
 def trans_layer(input, name=None, layer_attr=None):
     """
-    A layer for transposition.
+    A layer for transposing a minibatch matrix.
 
     .. math::
        y = x^\mathrm{T}
@@ -1673,6 +1676,87 @@ def trans_layer(input, name=None, layer_attr=None):
         name, LayerType.TRANS_LAYER, parents=[input], size=input.size)
 
 
+@wrap_name_default()
+@layer_support()
+def rotate_layer(input, height, name=None, layer_attr=None):
+    """
+    A layer for rotation (clock-wise), usually used when the input sample
+    is some image or map.
+
+    .. math::
+       y(j,i) = x(M-i-1,j)
+
+    where :math:`x` is (M x N) input, and :math:`y` is (N x M) output.
+
+    The example usage is:
+
+    .. code-block:: python
+
+       rot = rotate_layer(input=layer,
+                          height=100)
+
+    :param input: Input layer.
+    :type input: LayerOutput
+    :param height: The height of the sample matrix
+    :type height: int
+    :param name: Layer name.
+    :type name: basestring
+    :param layer_attr: extra layer attributes.
+    :type layer_attr: ExtraLayerAttribute.
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+    """
+    assert isinstance(input, LayerOutput)
+    l = Layer(name=name,
+              height=height,
+              type=LayerType.ROTATE_LAYER,
+              inputs=[input.name],
+              **ExtraLayerAttribute.to_kwargs(layer_attr))
+    return LayerOutput(name=name,
+                       layer_type=LayerType.ROTATE_LAYER,
+                       parents=[input],
+                       size=l.config.size)
+
+
+@wrap_name_default()
+@layer_support()
+def flip_layer(input, height, name=None, layer_attr=None):
+    """
+    A layer for flipping the matrix w.r.t the matrix center.
+    It's essentially rotating the matrix twice.
+    Used for input as image or map.
+
+    .. math::
+       y(i,j) = x(M-i-1, N-j-1)
+
+    where :math:`x` is (M x N) input, and :math:`y` is (M x N) output.
+
+    The example usage is:
+
+    .. code-block:: python
+
+       flip = flip_layer(input=layer,
+                         height=100)
+
+    :param input: Input layer.
+    :type input: LayerOutput
+    :param height: The height of the sample matrix
+    :type height: int
+    :param name: Layer name.
+    :type name: basestring
+    :param layer_attr: extra layer attributes.
+    :type layer_attr: ExtraLayerAttribute.
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+    """
+    assert isinstance(input, LayerOutput)
+    return rotate_layer(input=rotate_layer(input=input,
+                                           height=height),
+                        height=height,
+                        name=name,
+                        layer_attr=layer_attr)
+
+
 @wrap_name_default()
 @layer_support()
 def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None):
@@ -1826,14 +1910,14 @@ def img_conv_layer(input,
                    trans=False,
                    layer_type=None):
     """
-    Convolution layer for image. Paddle can support both square and non-square 
+    Convolution layer for image. Paddle can support both square and non-square
     input currently.
 
     The details of convolution layer, please refer UFLDL's `convolution
     <http://ufldl.stanford.edu/tutorial/supervised/
     FeatureExtractionUsingConvolution/>`_ .
 
-    Convolution Transpose (deconv) layer for image. Paddle can support both square 
+    Convolution Transpose (deconv) layer for image. Paddle can support both square
     and non-square input currently.
 
     The details of convolution transpose layer,
@@ -1892,7 +1976,7 @@ def img_conv_layer(input,
     :param trans: true if it is a convTransLayer, false if it is a convLayer
     :type trans: bool
     :param layer_type: specify the layer_type, default is None. If trans=True,
-                       layer_type has to be "exconvt", otherwise layer_type 
+                       layer_type has to be "exconvt", otherwise layer_type
                        has to be either "exconv" or "cudnn_conv"
     :type layer_type: String
     :return: LayerOutput object.
@@ -3619,9 +3703,9 @@ def pad_layer(input,
     input data and 3 zeros after the input data in channel dimension.
     pad_h means padding zeros in height dimension. pad_w means padding zeros
     in width dimension.
-    
+
     For example,
-    
+
     .. code-block::
 
       input(2,2,2,3)  = [
@@ -3630,7 +3714,7 @@ def pad_layer(input,
                           [ [[4,3,1], [1,8,7]],
                             [[3,8,9], [2,3,5]] ]
                         ]
- 
+
       pad_c=[1,1], pad_h=[0,0], pad_w=[0,0]
       output(2,4,2,3) = [
                           [ [[0,0,0], [0,0,0]],
@@ -4739,6 +4823,7 @@ def cross_entropy_with_selfnorm(input,
                                 layer_attr=None):
     """
     A loss layer for multi class entropy with selfnorm.
+    Input should be a vector of positive numbers, without normalization.
 
     .. code-block:: python
 
diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py
old mode 100644
new mode 100755
index d0cbe3891f..cadde11ff8
--- a/python/paddle/trainer_config_helpers/networks.py
+++ b/python/paddle/trainer_config_helpers/networks.py
@@ -957,22 +957,22 @@ def simple_gru(input,
     use one complete layer to implement rnn (including simple rnn, gru and lstm)
     with multiple time steps, such as recurrent_layer, lstmemory, grumemory. But,
     the multiplication operation :math:`W x_t` is not computed in these layers.
-    See details in their interfaces in layers.py. 
+    See details in their interfaces in layers.py.
     The other implementation is to use an recurrent group which can ensemble a
     series of layers to compute rnn step by step. This way is flexible for
     attenion mechanism or other complex connections.
 
     - gru_step_layer: only compute rnn by one step. It needs an memory as input
       and can be used in recurrent group.
-    - gru_unit: a wrapper of gru_step_layer with memory. 
+    - gru_unit: a wrapper of gru_step_layer with memory.
     - gru_group: a GRU cell implemented by a combination of multiple layers in
       recurrent group.
-      But :math:`W x_t` is not done in group.  
+      But :math:`W x_t` is not done in group.
     - gru_memory: a GRU cell implemented by one layer, which does same calculation
-      with gru_group and is faster than gru_group. 
-    - simple_gru: a complete GRU implementation inlcuding :math:`W x_t` and 
+      with gru_group and is faster than gru_group.
+    - simple_gru: a complete GRU implementation inlcuding :math:`W x_t` and
       gru_group. :math:`W` contains :math:`W_r`, :math:`W_z` and :math:`W`, see
-      formula in grumemory. 
+      formula in grumemory.
 
     The computational speed is that, grumemory is relatively better than
     gru_group, and gru_group is relatively better than simple_gru.
diff --git a/python/paddle/trainer_config_helpers/tests/layers_test_config.py b/python/paddle/trainer_config_helpers/tests/layers_test_config.py
index ae275735aa..10b7358d92 100644
--- a/python/paddle/trainer_config_helpers/tests/layers_test_config.py
+++ b/python/paddle/trainer_config_helpers/tests/layers_test_config.py
@@ -39,6 +39,10 @@ z1 = mixed_layer(
 assert z1.size > 0
 
 y2 = fc_layer(input=y, size=15)
+z2 = rotate_layer(input=y2,
+                  height=5)
+z3 = flip_layer(input=y2,
+                height=3)
 
 cos1 = cos_sim(a=x1, b=y1)
 cos3 = cos_sim(a=x1, b=y2, size=3)
@@ -46,7 +50,7 @@ cos3 = cos_sim(a=x1, b=y2, size=3)
 linear_comb = linear_comb_layer(weights=x1, vectors=y2, size=3)
 
 out = fc_layer(
-    input=[cos1, cos3, linear_comb, z, z1],
+    input=[cos1, cos3, linear_comb, z, z1, z2, z3],
     size=num_classes,
     act=SoftmaxActivation())
 
-- 
GitLab