diff --git a/.gitignore b/.gitignore
index 65ba217de37c82287829eef105066aba86d69651..ee8489c1d71bd050b9a1d9358a664d2294165292 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,6 @@ build/
 .vscode
 .idea
 .project
+.cproject
 .pydevproject
+Makefile
diff --git a/paddle/gserver/layers/ConvBaseLayer.cpp b/paddle/gserver/layers/ConvBaseLayer.cpp
index 42ff0b70d86f788d58d56854a778d61e2af53e06..6bc3b3b801796a227a7b767c8da048a3ccf88827 100644
--- a/paddle/gserver/layers/ConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ConvBaseLayer.cpp
@@ -14,12 +14,15 @@ limitations under the License. */
 
 #include "paddle/utils/Logging.h"
 #include "ConvBaseLayer.h"
+#include "paddle/math/MathUtils.h"
 namespace paddle {
 
 bool ConvBaseLayer::init(const LayerMap& layerMap,
                          const ParameterMap& parameterMap) {
   /* Initialize the basic parent class */
   Layer::init(layerMap, parameterMap);
+  isDeconv_ = (config_.type() == "exconv" || config_.type() == "cudnn_conv")
+              ? false : true;
 
   /* Initialize the convolutional layer parameter */
   numFilters_ = config_.num_filters();
@@ -42,8 +45,20 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
     outputW_.push_back(conf.output_x());
   }
 
+  CHECK(inputLayers_.size() == parameters_.size());
+  for (size_t i = 0; i < inputLayers_.size(); i++) {
+    size_t height, width;
+    height = filterPixels_[i] * filterChannels_[i];
+    width = (!isDeconv_) ? numFilters_ : channels_[i];
+
+    // create a new weight
+    CHECK_EQ(parameters_[i]->getSize(), width * height);
+    Weight* w = new Weight(height, width, parameters_[i]);
+    weights_.emplace_back(w);
+  }
+
   /* initialize the biases_ */
-  if (biasParameter_.get() != NULL) {
+  if (biasParameter_.get()) {
     if (sharedBiases_) {
       CHECK_EQ((size_t)numFilters_, biasParameter_->getSize());
       biases_ =
@@ -70,23 +85,48 @@ size_t ConvBaseLayer::calOutputSize() {
   clearAndReserve(&outputH_);
   clearAndReserve(&outputW_);
   size_t layerSize = 0;
-  for (size_t i = 0; i < inputLayers_.size(); i++) {
-    imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight());
-    imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth());
-    if (imgSizeH_[i] == 0)
-      imgSizeH_[i] = config_.inputs(i).conv_conf().img_size();
-    if (imgSizeW_[i] == 0)
-      imgSizeW_[i] = config_.inputs(i).conv_conf().img_size();
-    outputH_.push_back(outputSize(imgSizeH_[i], filterSizeY_[i], paddingY_[i],
-                                  strideY_[i], caffeMode_));
-    outputW_.push_back(outputSize(imgSizeW_[i], filterSize_[i], padding_[i],
-                                  stride_[i], caffeMode_));
-    CHECK_EQ(outputH_[i], outputH_[0]);
-    CHECK_EQ(outputW_[i], outputW_[0]);
+
+  auto setLayerSize = [&](IntV& inH, IntV& inW, IntV& outH, IntV& outW) {
+    for (size_t i = 0; i < inputLayers_.size(); i++) {
+       inH.push_back(inputLayers_[i]->getOutput().getFrameHeight());
+       inW.push_back(inputLayers_[i]->getOutput().getFrameWidth());
+       if (isDeconv_) {
+         if (inH[i] == 0)
+           inH[i] = config_.inputs(i).conv_conf().output_x();
+         if (inW[i] == 0)
+           inW[i] = config_.inputs(i).conv_conf().output_x();
+         outH.push_back(
+             imageSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i],
+                       caffeMode_));
+         outW.push_back(
+             imageSize(inW[i], filterSize_[i], padding_[i], stride_[i],
+                       caffeMode_));
+       } else {
+         if (inH[i] == 0)
+           inH[i] = config_.inputs(i).conv_conf().img_size();
+         if (inW[i] == 0)
+           inW[i] = config_.inputs(i).conv_conf().img_size();
+         outH.push_back(
+             outputSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i],
+                        caffeMode_));
+         outW.push_back(
+             outputSize(inW[i], filterSize_[i], padding_[i], stride_[i],
+                        caffeMode_));
+       }
+       CHECK_EQ(outH[i], outH[0]);
+       CHECK_EQ(outW[i], outW[0]);
+    }
+    getOutput().setFrameHeight(outH[0]);
+    getOutput().setFrameWidth(outW[0]);
+    layerSize = outH[0] * outW[0] * size_t(numFilters_);
+  };
+
+  if (isDeconv_) {
+    setLayerSize(outputH_, outputW_, imgSizeH_, imgSizeW_);
+  } else {
+    setLayerSize(imgSizeH_, imgSizeW_, outputH_, outputW_);
   }
-  getOutput().setFrameHeight(outputH_[0]);
-  getOutput().setFrameWidth(outputW_[0]);
-  layerSize = outputH_[0] * outputW_[0] * size_t(numFilters_);
+
   return layerSize;
 }
 
diff --git a/paddle/gserver/layers/ConvBaseLayer.h b/paddle/gserver/layers/ConvBaseLayer.h
index e660a6d6f50acf8286dfd6fc795e8a03ce3ba604..b80cab899585e7bd93bfc86d8afa116d343d36d7 100644
--- a/paddle/gserver/layers/ConvBaseLayer.h
+++ b/paddle/gserver/layers/ConvBaseLayer.h
@@ -28,6 +28,9 @@ class ConvBaseLayer : public Layer {
 protected:
   typedef std::vector<int> IntV;
 
+  /// True if it's deconv layer, false if it's convolution layer
+  bool isDeconv_;
+
   /// The number of filters.
   int numFilters_;
   /// The x dimension of the padding.
diff --git a/paddle/gserver/layers/ExpandConvBaseLayer.cpp b/paddle/gserver/layers/ExpandConvBaseLayer.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0bab0ca764f4fea7dc37f0eae096de1a79c9df21
--- /dev/null
+++ b/paddle/gserver/layers/ExpandConvBaseLayer.cpp
@@ -0,0 +1,263 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+
+#include "ExpandConvBaseLayer.h"
+
+#include "paddle/utils/Logging.h"
+namespace paddle {
+
+bool ExpandConvBaseLayer::init(const LayerMap &layerMap,
+                           const ParameterMap &parameterMap) {
+  /* Initialize the basic convolutional parent class */
+  ConvBaseLayer::init(layerMap, parameterMap);
+
+  /* The class fields channels_ and numFilters_ are the same as in the config
+   * i.e., channels_ is the for the input and numFilters_ is for the output
+   *
+   * But in order for the variables in convTrans having the same semantic
+   * meaning as in conv, we need to swap channels_ and numFilters here for
+   * convTrans, and in other functions too.
+   * */
+  int channel;
+  int numFilters;
+  /* Initialize the projection */
+  for (auto &inputConfig : config_.inputs()) {
+    const ConvConfig &conf = inputConfig.conv_conf();
+    numFilters = isDeconv_ ? conf.channels() : numFilters_;
+    subM_.push_back(numFilters / conf.groups());
+    subN_.push_back(conf.output_x() * conf.output_x());
+    channel = isDeconv_ ? numFilters_ : conf.channels();
+    subK_.push_back(channel * conf.filter_size() * conf.filter_size() /
+                    conf.groups());
+    /* Consistent caffe mode for multiple input */
+    caffeMode_ = conf.caffe_mode();
+  }
+
+  getOutputSize();
+
+  return true;
+}
+
+size_t ExpandConvBaseLayer::getOutputSize() {
+  CHECK_NE(inputLayers_.size(), 0UL);
+  size_t layerSize = ConvBaseLayer::calOutputSize();
+  subN_.clear();
+  for (size_t i = 0; i < inputLayers_.size(); i++) {
+    subN_.push_back(outputH_[i] * outputW_[i]);
+  }
+  return layerSize;
+}
+
+void ExpandConvBaseLayer::resetExpandInput(size_t height, size_t width) {
+  Matrix::resizeOrCreate(expandInput_, height, width, false, useGpu_);
+}
+
+void ExpandConvBaseLayer::addSharedBias() {
+  size_t mapW = getOutputSize() / numFilters_;
+  size_t mapH = getOutputValue()->getElementCnt() / mapW;
+  MatrixPtr out =
+      Matrix::create(getOutputValue()->getData(), mapH, mapW, false, useGpu_);
+
+  Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_);
+
+  out->transpose(transOutValue_, false);  // false means no memory allocation
+  transOutValue_->reshape(transOutValue_->getElementCnt() / numFilters_,
+                          numFilters_);
+
+  MatrixPtr bias =
+      Matrix::create(biases_->getW()->getData(), 1,
+                     biases_->getW()->getElementCnt(), false, useGpu_);
+  transOutValue_->addBias(*bias, 1.0f);
+
+  transOutValue_->reshape(mapW, mapH);
+  transOutValue_->transpose(out, false);  // false means no memory allocation
+
+  out->clear();
+  bias->clear();
+}
+
+void ExpandConvBaseLayer::addUnsharedBias() {
+  MatrixPtr outValue = getOutputValue();
+  MatrixPtr bias =
+      Matrix::create(biases_->getW()->getData(), 1,
+                     biases_->getW()->getElementCnt(), false, useGpu_);
+  outValue->addBias(*bias, 1.0f);
+}
+
+
+void ExpandConvBaseLayer::expandOneFrame(MatrixPtr image, size_t startIdx,
+                                     int inIdx) {
+  int channel = isDeconv_ ? numFilters_ : channels_[inIdx];
+
+  resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]);
+  real *imgData = image->getData() + startIdx * image->getWidth();
+  MatrixPtr imageTmp = Matrix::create(
+      imgData, 1, imgSizeH_[inIdx] * imgSizeW_[inIdx] * channel, false,
+      useGpu_);
+  expandInput_->convExpand(*imageTmp, imgSizeH_[inIdx], imgSizeW_[inIdx],
+                           channel, filterSize_[inIdx],
+                           filterSize_[inIdx], stride_[inIdx], stride_[inIdx],
+                           padding_[inIdx], padding_[inIdx],
+                           outputH_[inIdx], outputW_[inIdx]);
+  imageTmp->clear();
+}
+
+void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image, MatrixPtr out,
+                                     int inIdx, int startIdx) {
+  int subM = subM_[inIdx];
+  int subN = subN_[inIdx];
+  int subK = subK_[inIdx];
+
+  expandOneFrame(image, startIdx, inIdx);
+
+  int numFilters = isDeconv_ ? channels_[inIdx] : numFilters_;
+
+  real *outData =
+      out->getData() + startIdx * subN * numFilters;
+
+  real *wgtData = weights_[inIdx]->getW()->getData();
+  real *expInData = expandInput_->getData();
+  for (int g = 0; g < groups_[inIdx]; ++g) {
+    MatrixPtr A =
+        Matrix::create(wgtData, subK, subM, true, useGpu_);  // mark transpose
+    MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_);
+    MatrixPtr C = Matrix::create(outData, subM, subN, false, useGpu_);
+    C->mul(A, B, 1, 1);
+
+    A->clear();
+    B->clear();
+    C->clear();
+    wgtData += subK * subM;
+    expInData += subK * subN;
+    outData += subM * subN;
+  }
+}
+
+void ExpandConvBaseLayer::bpropActs(MatrixPtr out, MatrixPtr image,
+                                    int inpIdx) {
+  int channel = isDeconv_ ? numFilters_ : channels_[inpIdx];
+
+  int subM = subM_[inpIdx];
+  int subN = subN_[inpIdx];
+  int subK = subK_[inpIdx];
+  size_t batchSize = image->getHeight();
+
+  /* reset the expand-grad memory */
+  resetExpandInput(subK * groups_[inpIdx], subN);
+
+  real *localGradData = out->getData();
+  real *tgtGradData = image->getData();
+  for (size_t n = 0; n < batchSize; n++) {
+    real *wgtData = weights_[inpIdx]->getW()->getData();
+    real *expandInData = expandInput_->getData();
+
+    for (int g = 0; g < groups_[inpIdx]; g++) {
+      // create temporary matrix
+      MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_);
+      MatrixPtr B = Matrix::create(localGradData, subM, subN, false, useGpu_);
+      MatrixPtr A = Matrix::create(wgtData, subK, subM, false, useGpu_);
+      C->mul(A, B);  // mul
+
+      // clear the temporary matrix
+      A->clear();
+      B->clear();
+      C->clear();
+
+      expandInData += subK * subN;
+      localGradData += subM * subN;
+      wgtData += subK * subM;
+    }
+
+    // shrink one frame outGrad
+    MatrixPtr oneGradTmp = Matrix::create(
+        expandInput_->getData(), subK * groups_[inpIdx], subN, false, useGpu_);
+    MatrixPtr vTmp = Matrix::create(
+        tgtGradData, 1,
+        imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel, false,
+        useGpu_);
+    vTmp->convShrink(*oneGradTmp, imgSizeH_[inpIdx], imgSizeW_[inpIdx],
+                     channel, filterSize_[inpIdx],
+                     filterSize_[inpIdx], stride_[inpIdx], stride_[inpIdx],
+                     padding_[inpIdx], padding_[inpIdx],
+                     outputH_[inpIdx], outputW_[inpIdx], 1.0f, 1.0f);
+    vTmp->clear();
+    oneGradTmp->clear();
+
+    // move the data-pointer
+    tgtGradData += imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel;
+  }
+}
+
+void ExpandConvBaseLayer::bpropWeights(MatrixPtr image, MatrixPtr out,
+                                    int inpIdx) {
+  MatrixPtr weightGrad = weights_[inpIdx]->getWGrad();
+
+  int subM = subM_[inpIdx];
+  int subN = subN_[inpIdx];
+  int subK = subK_[inpIdx];
+  size_t batchSize = image->getHeight();
+  resetExpandInput(subK * groups_[inpIdx], subN);
+
+  real *gradData = out->getData();
+
+  for (size_t n = 0; n < batchSize; n++) {  // frame by frame
+    // expand
+    expandOneFrame(image, n, inpIdx);
+    real *wGradData = weightGrad->getData();
+    real *expandInData = expandInput_->getData();
+
+    // expand-mul one-group by one
+    for (int g = 0; g < groups_[inpIdx]; g++) {
+      MatrixPtr A = Matrix::create(expandInData, subK, subN, false, useGpu_);
+      MatrixPtr B = Matrix::create(gradData, subM, subN, true, useGpu_);
+      MatrixPtr C = Matrix::create(wGradData, subK, subM, false, useGpu_);
+      C->mul(A, B, 1, 1);
+
+      A->clear();
+      B->clear();
+      C->clear();
+      gradData += subM * subN;
+      wGradData += subK * subM;
+      expandInData += subK * subN;
+    }
+  }
+}
+
+void ExpandConvBaseLayer::bpropSharedBias(MatrixPtr biases, MatrixPtr v) {
+  size_t mapW = getOutputSize() / numFilters_;
+  size_t mapH = v->getElementCnt() / mapW;
+  MatrixPtr vTmp = Matrix::create(v->getData(), mapH, mapW, false, useGpu_);
+
+  Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_);
+
+  vTmp->transpose(transOutValue_, false);  // false means no memory allocation
+  transOutValue_->reshape(transOutValue_->getElementCnt() / numFilters_,
+                          numFilters_);
+  biases->collectBias(*transOutValue_, 1.0f);
+}
+
+void ExpandConvBaseLayer::bpropBiases(MatrixPtr v) {
+  MatrixPtr biases =
+      Matrix::create(biases_->getWGrad()->getData(), 1,
+                     biases_->getWGrad()->getElementCnt(), false, useGpu_);
+  if (sharedBiases_) {
+    bpropSharedBias(biases, v);
+  } else {
+    biases->collectBias(*v, 1.0f);
+  }
+  biases->clear();
+}
+
+}  // namespace paddle
diff --git a/paddle/gserver/layers/ExpandConvBaseLayer.h b/paddle/gserver/layers/ExpandConvBaseLayer.h
new file mode 100644
index 0000000000000000000000000000000000000000..9858fa348c3fc85fdea0c017ca44fa047a6eaf42
--- /dev/null
+++ b/paddle/gserver/layers/ExpandConvBaseLayer.h
@@ -0,0 +1,85 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+
+#pragma once
+
+#include "ConvBaseLayer.h"
+#include "paddle/math/Matrix.h"
+#include <vector>
+
+namespace paddle {
+
+/**
+ * @brief A subclass of ConvBaseLayer that is a superclass of both
+ * ExpandConvLayer and ExpandConvTransLayer
+ */
+class ExpandConvBaseLayer : public ConvBaseLayer {
+protected:
+  /// For expand convolution.
+  /// subM_ = numFilters_ / groups_.
+  IntV subM_;
+  /// subN_ = outputH_ * outputW_.
+  IntV subN_;
+  /// subK_ = channels_ * filterPixels_ * groups_.
+  IntV subK_;
+
+  /*The expandInput_ and transOutValue_ are used for CPU expand conv calc
+   * Expand one sample at a time. shape:
+   * (numChannels * filterPixels_, outputSizeH * outputSizeW)
+   * */
+  MatrixPtr expandInput_;
+  /// The transpose of output, which is an auxiliary matrix.
+  MatrixPtr transOutValue_;
+
+public:
+  explicit ExpandConvBaseLayer(const LayerConfig& config)
+    : ConvBaseLayer(config) {}
+
+  ~ExpandConvBaseLayer() {}
+
+  bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
+
+  size_t getOutputSize();
+  /**
+   * Create or resize expandInput_.
+   */
+  void resetExpandInput(size_t height, size_t width);
+
+  /**
+   * Add shared bias.
+   */
+  void addSharedBias();
+
+  /**
+   * Add unshared bias.
+   */
+  void addUnsharedBias();
+  /**
+   * Expand one input sample.
+   */
+  void expandOneFrame(MatrixPtr image, size_t startIdx, int inIdx);
+
+  /**
+   * Expand one input sample and perform matrix multiplication.
+   */
+  void expandFwdOnce(MatrixPtr image, MatrixPtr out, int inIdx, int startIdx);
+
+  void bpropSharedBias(MatrixPtr biases, MatrixPtr v);
+  void bpropBiases(MatrixPtr v);
+  void bpropWeights(MatrixPtr image, MatrixPtr out, int inpIdx);
+  void bpropActs(MatrixPtr image, MatrixPtr out, int inpIdx);
+};
+
+}  // namespace paddle
diff --git a/paddle/gserver/layers/ExpandConvLayer.cpp b/paddle/gserver/layers/ExpandConvLayer.cpp
index 80a6a62b5c0de768f9cc534adf68405a883ec10f..5ea1fdece5f7b83c7e1d576e7f02a4a2545f0cd8 100644
--- a/paddle/gserver/layers/ExpandConvLayer.cpp
+++ b/paddle/gserver/layers/ExpandConvLayer.cpp
@@ -24,150 +24,29 @@ REGISTER_LAYER(exconv, ExpandConvLayer);
 bool ExpandConvLayer::init(const LayerMap &layerMap,
                            const ParameterMap &parameterMap) {
   /* Initialize the basic convolutional parent class */
-  ConvBaseLayer::init(layerMap, parameterMap);
-
-  /* Initialize the projection */
-  for (auto &inputConfig : config_.inputs()) {
-    const ConvConfig &conf = inputConfig.conv_conf();
-    subM_.push_back(numFilters_ / conf.groups());
-    subN_.push_back(conf.output_x() * conf.output_x());
-    subK_.push_back(conf.channels() * conf.filter_size() * conf.filter_size() /
-                    conf.groups());
-    /* Consistent caffe mode for multiple input */
-    caffeMode_ = conf.caffe_mode();
-  }
-
-  /* initialize the weightList */
-  CHECK(inputLayers_.size() == parameters_.size());
-  for (size_t i = 0; i < inputLayers_.size(); i++) {
-    size_t height, width;
-    height = filterPixels_[i] * filterChannels_[i];
-    width = numFilters_;
-
-    // create a new weight
-    CHECK_EQ(parameters_[i]->getSize(), width * height);
-    Weight* w = new Weight(height, width, parameters_[i]);
-    weights_.emplace_back(w);
-  }
-
+  ExpandConvBaseLayer::init(layerMap, parameterMap);
   return true;
 }
 
-size_t ExpandConvLayer::getOutputSize() {
-  CHECK_NE(inputLayers_.size(), 0UL);
-  size_t layerSize = ConvBaseLayer::calOutputSize();
-  subN_.clear();
-  for (size_t i = 0; i < inputLayers_.size(); i++) {
-    subN_.push_back(outputH_[i] * outputW_[i]);
-  }
-  return layerSize;
-}
-
-void ExpandConvLayer::resetExpandInput(size_t height, size_t width) {
-  Matrix::resizeOrCreate(expandInput_, height, width, false, useGpu_);
-}
-
-void ExpandConvLayer::resetConvOutput(size_t batchSize, int inIdx) {
-  Matrix::resizeOrCreate(transOutValue_, batchSize * numFilters_, subN_[inIdx],
-                         false, useGpu_);
-}
-
-void ExpandConvLayer::expandOneFrame(MatrixPtr image, size_t startIdx,
-                                     int inIdx) {
-  resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]);
-  real *imgData = image->getData() + startIdx * image->getWidth();
-  MatrixPtr imageTmp = Matrix::create(
-      imgData, 1, imgSizeH_[inIdx] * imgSizeW_[inIdx] * channels_[inIdx], false,
-      useGpu_);
-  expandInput_->convExpand(*imageTmp, imgSizeH_[inIdx], imgSizeW_[inIdx],
-                           channels_[inIdx], filterSize_[inIdx],
-                           filterSize_[inIdx], stride_[inIdx], stride_[inIdx],
-                           padding_[inIdx], padding_[inIdx],
-                           outputH_[inIdx], outputW_[inIdx]);
-  imageTmp->clear();
-}
-
-void ExpandConvLayer::expandFwdOnce(MatrixPtr image, int inIdx, int startIdx) {
-  int subM = subM_[inIdx];
-  int subN = subN_[inIdx];
-  int subK = subK_[inIdx];
-
-  expandOneFrame(image, startIdx, inIdx);
-
-  real *outData =
-      getOutputValue()->getData() + startIdx * subN * numFilters_;
-
-  real *wgtData = weights_[inIdx]->getW()->getData();
-  real *expInData = expandInput_->getData();
-  for (int g = 0; g < groups_[inIdx]; ++g) {
-    MatrixPtr A =
-        Matrix::create(wgtData, subK, subM, true, useGpu_);  // mark transpose
-    MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_);
-    MatrixPtr C = Matrix::create(outData, subM, subN, false, useGpu_);
-    C->mul(A, B, 1, 1);
-
-    A->clear();
-    B->clear();
-    C->clear();
-    wgtData += subK * subM;
-    expInData += subK * subN;
-    outData += subM * subN;
-  }
-}
-
-void ExpandConvLayer::addSharedBias() {
-  size_t mapW = getOutputValue()->getWidth() / numFilters_;
-  size_t mapH = getOutputValue()->getElementCnt() / mapW;
-  MatrixPtr out =
-      Matrix::create(getOutputValue()->getData(), mapH, mapW, false, useGpu_);
-
-  Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_);
-
-  out->transpose(transOutValue_, false);  // false means no memory allocation
-  transOutValue_->reshape(transOutValue_->getElementCnt() / numFilters_,
-                          numFilters_);
-
-  MatrixPtr bias =
-      Matrix::create(biases_->getW()->getData(), 1,
-                     biases_->getW()->getElementCnt(), false, useGpu_);
-  transOutValue_->addBias(*bias, 1.0f);
-
-  transOutValue_->reshape(mapW, mapH);
-  transOutValue_->transpose(out, false);  // false means no memory allocation
-
-  out->clear();
-  bias->clear();
-}
-
-void ExpandConvLayer::addUnsharedBias() {
-  MatrixPtr outValue = getOutputValue();
-  MatrixPtr bias =
-      Matrix::create(biases_->getW()->getData(), 1,
-                     biases_->getW()->getElementCnt(), false, useGpu_);
-  outValue->addBias(*bias, 1.0f);
-}
-
 void ExpandConvLayer::forward(PassType passType) {
   Layer::forward(passType);
 
   /* malloc memory for the output_ if necessary */
-  /* note: one sample correspond to one colum, and the
-   *   transOutValue correspond sample to one row */
-  int batchSize = inputLayers_[0]->getOutputValue()->getWidth();
-  batchSize = inputLayers_[0]->getOutputValue()->getHeight();
+  int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
   resetOutput(batchSize, getOutputSize());
 
   MatrixPtr image = nullptr;
-  for (size_t i = 0; i != inputLayers_.size(); ++i) {
+  MatrixPtr outV = getOutputValue();
+  for (size_t i = 0; i < inputLayers_.size(); ++i) {
     LayerPtr prevLayer = getPrev(i);
     image = prevLayer->getOutputValue();
     for (size_t off = 0; off < image->getHeight(); off++) {
       REGISTER_TIMER_INFO("expandFwdOnce", getName().c_str());
-      expandFwdOnce(image, i, off);
+      expandFwdOnce(image, outV, i, off);
     }
   }
   /* add the bias-vector */
-  if (biases_.get() != NULL) {
+  if (biases_.get()) {
     if (sharedBiases_) {
       addSharedBias();
     } else {
@@ -179,29 +58,6 @@ void ExpandConvLayer::forward(PassType passType) {
   forwardActivation();
 }
 
-void ExpandConvLayer::bpropSharedBias(MatrixPtr biases, MatrixPtr v) {
-  size_t mapW = v->getWidth() / numFilters_;
-  size_t mapH = v->getElementCnt() / mapW;
-  MatrixPtr vTmp = Matrix::create(v->getData(), mapH, mapW, false, useGpu_);
-
-  Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_);
-
-  vTmp->transpose(transOutValue_, false);  // false means no memory allocation
-  vTmp->reshape(transOutValue_->getElementCnt() / numFilters_, numFilters_);
-  biases->collectBias(*vTmp, 1.0f);
-}
-
-void ExpandConvLayer::bpropBiases(MatrixPtr v) {
-  MatrixPtr biases =
-      Matrix::create(biases_->getWGrad()->getData(), 1,
-                     biases_->getWGrad()->getElementCnt(), false, useGpu_);
-  if (sharedBiases_) {
-    bpropSharedBias(biases, v);
-  } else {
-    biases->collectBias(*v, 1.0f);
-  }
-  biases->clear();
-}
 
 void ExpandConvLayer::backward(const UpdateCallback &callback) {
   backwardActivation();
@@ -213,111 +69,18 @@ void ExpandConvLayer::backward(const UpdateCallback &callback) {
     biases_->getParameterPtr()->incUpdate(callback);
   }
 
-  for (size_t i = 0; i != inputLayers_.size(); ++i) {
+  for (size_t i = 0; i < inputLayers_.size(); ++i) {
     /* First, calculate the input layers error */
-    bpropActs(outGrad, i);
+    if (getPrev(i)->getOutputGrad()) {
+      bpropActs(outGrad, getPrev(i)->getOutputGrad(), i);
+    }
     if (weights_[i]->getWGrad()) {
       /* Then, calculate the W-gradient for the current layer */
-      bpropWeights(outGrad, i);
+      bpropWeights(getPrev(i)->getOutputValue(), outGrad, i);
       /* Increasing the number of gradient */
       weights_[i]->getParameterPtr()->incUpdate(callback);
     }
   }
 }
 
-void ExpandConvLayer::bpropWeights(MatrixPtr v, int inpIdx) {
-  MatrixPtr weightGrad = weights_[inpIdx]->getWGrad();
-  MatrixPtr inputV = getPrev(inpIdx)->getOutputValue();
-
-  int subM = subM_[inpIdx];
-  int subN = subN_[inpIdx];
-  int subK = subK_[inpIdx];
-  size_t batchSize = inputV->getHeight();
-  resetExpandInput(subK * groups_[inpIdx], subN);
-  resetConvOutput(batchSize, inpIdx);
-
-  real *gradData = v->getData();
-
-  for (size_t n = 0; n < batchSize; n++) {  // frame by frame
-    // expand
-    expandOneFrame(inputV, n, inpIdx);
-    real *wGradData = weightGrad->getData();
-    real *expandInData = expandInput_->getData();
-
-    // expand-mul one-group by one
-    for (int g = 0; g < groups_[inpIdx]; g++) {
-      MatrixPtr A = Matrix::create(expandInData, subK, subN, false, useGpu_);
-      MatrixPtr B = Matrix::create(gradData, subM, subN, true, useGpu_);
-      MatrixPtr C = Matrix::create(wGradData, subK, subM, false, useGpu_);
-      C->mul(A, B, 1, 1);
-
-      A->clear();
-      B->clear();
-      C->clear();
-      gradData += subM * subN;
-      wGradData += subK * subM;
-      expandInData += subK * subN;
-    }
-  }
-}
-
-void ExpandConvLayer::bpropActs(MatrixPtr v, int inpIdx) {
-  LayerPtr prevLayer = getPrev(inpIdx);
-  if (NULL == prevLayer->getOutputGrad()) {
-    return;
-  }
-
-  int subM = subM_[inpIdx];
-  int subN = subN_[inpIdx];
-  int subK = subK_[inpIdx];
-  size_t batchSize = v->getHeight();
-  MatrixPtr tgtGrad = prevLayer->getOutputGrad();
-
-  /* reset the expand-grad memory */
-  resetExpandInput(subK * groups_[inpIdx], subN);
-  resetConvOutput(batchSize, inpIdx);
-
-  real *localGradData = v->getData();
-  real *tgtGradData = tgtGrad->getData();
-  for (size_t n = 0; n < batchSize; n++) {
-    real *wgtData = weights_[inpIdx]->getW()->getData();
-    real *expandInData = expandInput_->getData();
-
-    for (int g = 0; g < groups_[inpIdx]; g++) {
-      // create temporary matrix
-      MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_);
-      MatrixPtr B = Matrix::create(localGradData, subM, subN, false, useGpu_);
-      MatrixPtr A = Matrix::create(wgtData, subK, subM, false, useGpu_);
-      C->mul(A, B);  // mul
-
-      // clear the temporary matrix
-      A->clear();
-      B->clear();
-      C->clear();
-
-      expandInData += subK * subN;
-      localGradData += subM * subN;
-      wgtData += subK * subM;
-    }
-
-    // shrink one frame outGrad
-    MatrixPtr oneGradTmp = Matrix::create(
-        expandInput_->getData(), subK * groups_[inpIdx], subN, false, useGpu_);
-    MatrixPtr vTmp = Matrix::create(
-        tgtGradData, 1,
-        imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channels_[inpIdx], false,
-        useGpu_);
-    vTmp->convShrink(*oneGradTmp, imgSizeH_[inpIdx], imgSizeW_[inpIdx],
-                     channels_[inpIdx], filterSize_[inpIdx],
-                     filterSize_[inpIdx], stride_[inpIdx], stride_[inpIdx],
-                     padding_[inpIdx], padding_[inpIdx],
-                     outputH_[inpIdx], outputW_[inpIdx], 1.0f, 1.0f);
-    vTmp->clear();
-    oneGradTmp->clear();
-
-    // move the data-pointer
-    tgtGradData += imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channels_[inpIdx];
-  }
-}
-
 }  // namespace paddle
diff --git a/paddle/gserver/layers/ExpandConvLayer.h b/paddle/gserver/layers/ExpandConvLayer.h
index 030a3ba397ff41208bda84d0d6b876359d587c57..c07188a406183416cd57e2d027ba1205f6b65176 100644
--- a/paddle/gserver/layers/ExpandConvLayer.h
+++ b/paddle/gserver/layers/ExpandConvLayer.h
@@ -15,9 +15,9 @@ limitations under the License. */
 
 #pragma once
 
-#include "ConvBaseLayer.h"
 #include "paddle/math/Matrix.h"
 #include <vector>
+#include "ExpandConvBaseLayer.h"
 
 namespace paddle {
 
@@ -28,65 +28,18 @@ namespace paddle {
  *
  * The config file api is img_conv_layer.
  */
-class ExpandConvLayer : public ConvBaseLayer {
-protected:
-  /// For expand convolution.
-  /// subM_ = numFilters_ / groups_.
-  IntV subM_;
-  /// subN_ = outputH_ * outputW_.
-  IntV subN_;
-  /// subK_ = channels_ * filterPixels_ * groups_.
-  IntV subK_;
-  /// Expand one sample at a time. shape:
-  /// (numChannels * filterPixels_, outputSizeH * outputSizeW)
-  MatrixPtr expandInput_;
-  /// The transpose of output, which is an auxiliary matrix.
-  MatrixPtr transOutValue_;
 
+class ExpandConvLayer : public ExpandConvBaseLayer {
 public:
-  explicit ExpandConvLayer(const LayerConfig& config) : ConvBaseLayer(config) {}
+  explicit ExpandConvLayer(const LayerConfig& config) :
+    ExpandConvBaseLayer(config) {}
 
   ~ExpandConvLayer() {}
 
   bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
 
-  size_t getOutputSize();
-
-  /**
-   * Create or resize expandInput_.
-   */
-  void resetExpandInput(size_t height, size_t width);
-
-  /**
-   * Create or resize transOutValue_.
-   */
-  void resetConvOutput(size_t batchSize, int inIdx);
-
-  /**
-   * Expand one input sample.
-   */
-  void expandOneFrame(MatrixPtr image, size_t startIdx, int inIdx);
-
-  /**
-   * Expand one input sample and perform matrix multiplication.
-   */
-  void expandFwdOnce(MatrixPtr image, int inIdx, int startIdx);
-
-  /**
-   * Add shared bias.
-   */
-  void addSharedBias();
-
-  /**
-   * Add unshared bias.
-   */
-  void addUnsharedBias();
   void forward(PassType passType);
-  void bpropSharedBias(MatrixPtr biases, MatrixPtr v);
-  void bpropBiases(MatrixPtr v);
   void backward(const UpdateCallback& callback);
-  void bpropWeights(MatrixPtr v, int inpIdx);
-  void bpropActs(MatrixPtr v, int inpIdx);
 };
 
 }  // namespace paddle
diff --git a/paddle/gserver/layers/ExpandConvTransLayer.cpp b/paddle/gserver/layers/ExpandConvTransLayer.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a3e160f1f4eb524d39ed90cb17f59f58c690f964
--- /dev/null
+++ b/paddle/gserver/layers/ExpandConvTransLayer.cpp
@@ -0,0 +1,92 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+
+#include "paddle/utils/Logging.h"
+#include "paddle/utils/Stat.h"
+#include "ExpandConvTransLayer.h"
+
+/* The implementation of the convTransLayer is basically a swap of forward and
+ * backward of the original convLayer.
+ * The variable naming follows the convention of the convLayer.
+ * */
+
+namespace paddle {
+
+REGISTER_LAYER(exconvt, ExpandConvTransLayer);
+
+bool ExpandConvTransLayer::init(const LayerMap &layerMap,
+                           const ParameterMap &parameterMap) {
+  /* Initialize the basic convolutional parent class */
+  ExpandConvBaseLayer::init(layerMap, parameterMap);
+
+  return true;
+}
+
+void ExpandConvTransLayer::forward(PassType passType) {
+  Layer::forward(passType);
+
+  /* malloc memory for the output_ if necessary */
+  int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
+  resetOutput(batchSize, getOutputSize());
+
+  MatrixPtr output = nullptr;
+  for (size_t i = 0; i < inputLayers_.size(); ++i) {
+    LayerPtr prevLayer = getPrev(i);
+    output = prevLayer->getOutputValue();
+    REGISTER_TIMER_INFO("shrinkFwd", getName().c_str());
+    bpropActs(output, getOutputValue(), i);
+  }
+
+  /* add the bias-vector */
+  if (biases_.get()) {
+    if (sharedBiases_) {
+      addSharedBias();
+    } else {
+      addUnsharedBias();
+    }
+  }
+
+  /* activation */
+  forwardActivation();
+}
+
+void ExpandConvTransLayer::backward(const UpdateCallback &callback) {
+  backwardActivation();
+
+  MatrixPtr imageGrad = getOutputGrad();
+  if (biases_ && biases_->getWGrad()) {
+    bpropBiases(imageGrad);
+    /* Increasing the number of gradient */
+    biases_->getParameterPtr()->incUpdate(callback);
+  }
+
+  for (size_t i = 0; i < inputLayers_.size(); ++i) {
+    /* First, calculate the input layers error */
+    for (size_t off = 0; off < imageGrad->getHeight(); off++) {
+      if (getPrev(i)->getOutputGrad()) {
+        expandFwdOnce(imageGrad, getPrev(i)->getOutputGrad(), i, off);
+      }
+    }
+    if (weights_[i]->getWGrad()) {
+      /* Then, calculate the W-gradient for the current layer */
+      bpropWeights(imageGrad, getPrev(i)->getOutputValue(), i);
+      /* Increasing the number of gradient */
+      weights_[i]->getParameterPtr()->incUpdate(callback);
+    }
+  }
+}
+
+
+}  // namespace paddle
diff --git a/paddle/gserver/layers/ExpandConvTransLayer.h b/paddle/gserver/layers/ExpandConvTransLayer.h
new file mode 100644
index 0000000000000000000000000000000000000000..87c464a97f2edd5c3528a4434a2aa741d10ddf2e
--- /dev/null
+++ b/paddle/gserver/layers/ExpandConvTransLayer.h
@@ -0,0 +1,44 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+
+#pragma once
+
+#include "paddle/math/Matrix.h"
+#include <vector>
+#include "ExpandConvBaseLayer.h"
+
+namespace paddle {
+
+/**
+ * @brief A subclass of convolution layer.
+ * This layer expands input and use matrix multiplication to
+ * calculate convolution transpose (deconv) operation.
+ *
+ * The config file api is img_conv_layer with flag trans=True.
+ */
+class ExpandConvTransLayer : public ExpandConvBaseLayer {
+public:
+  explicit ExpandConvTransLayer(const LayerConfig& config) :
+    ExpandConvBaseLayer(config) {}
+
+  ~ExpandConvTransLayer() {}
+
+  bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
+
+  void forward(PassType passType);
+  void backward(const UpdateCallback& callback);
+};
+
+}  // namespace paddle
diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt
index 26ee2b3aae64abfce69b543f13ab0f4254757fd8..0651d0b4733ea9c3f54a42169774217b65091aa6 100644
--- a/paddle/gserver/tests/CMakeLists.txt
+++ b/paddle/gserver/tests/CMakeLists.txt
@@ -26,6 +26,14 @@ add_unittest_without_exec(test_ActivationGrad
     TestUtil.cpp)
 add_test(NAME test_ActivationGrad
     COMMAND test_ActivationGrad)
+################# test_ConvTrans #######################
+add_unittest_without_exec(test_ConvTrans
+    test_ConvTrans.cpp
+    LayerGradUtil.cpp
+    TestUtil.cpp)
+
+add_test(NAME test_ConvTrans
+    COMMAND test_ConvTrans)
 
 ################## test_Evaluator #######################
 add_unittest(test_Evaluator
diff --git a/paddle/gserver/tests/test_ConvTrans.cpp b/paddle/gserver/tests/test_ConvTrans.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bff7222b29907cb66d79decea76e1b5e26205ddf
--- /dev/null
+++ b/paddle/gserver/tests/test_ConvTrans.cpp
@@ -0,0 +1,246 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include <vector>
+#include <string>
+#include "paddle/gserver/layers/DataLayer.h"
+#include "ModelConfig.pb.h"
+#include "paddle/trainer/Trainer.h"
+#include "paddle/utils/GlobalConstants.h"
+#include "paddle/gserver/layers/ExpandConvTransLayer.h"
+#include "paddle/math/MathUtils.h"
+
+#include "TestUtil.h"
+#include "LayerGradUtil.h"
+
+using namespace paddle;  // NOLINT
+using namespace std;     // NOLINT
+
+P_DECLARE_bool(use_gpu);
+P_DECLARE_int32(gpu_id);
+P_DECLARE_double(checkgrad_eps);
+P_DECLARE_bool(thread_local_rand_use_global_seed);
+P_DECLARE_bool(prev_batch_state);
+
+// Test that the convTrans forward is the same as conv backward
+TEST(Layer, convTransLayerFwd) {
+    // Setting up conv-trans layer
+    TestConfig configt;
+    configt.biasSize = 3;
+    configt.layerConfig.set_type("exconvt");
+    configt.layerConfig.set_num_filters(3);
+    configt.layerConfig.set_partial_sum(1);
+    configt.layerConfig.set_shared_biases(true);
+
+    configt.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 384});
+    LayerInputConfig* input = configt.layerConfig.add_inputs();
+    ConvConfig* conv = input->mutable_conv_conf();
+    conv->set_filter_size(2);
+    conv->set_filter_size_y(4);
+    conv->set_channels(16);
+    conv->set_padding(0);
+    conv->set_padding_y(1);
+    conv->set_stride(2);
+    conv->set_stride_y(2);
+    conv->set_groups(1);
+    conv->set_filter_channels(3 / conv->groups());
+    conv->set_img_size(16);
+    conv->set_output_x(outputSize(conv->img_size(), conv->filter_size(),
+                                  conv->padding(), conv->stride(),
+                                  /* caffeMode */ true));
+    configt.layerConfig.set_size(conv->img_size() * conv->img_size() *
+                                configt.layerConfig.num_filters());
+    configt.layerConfig.set_name("convTrans");
+
+    // data layer initialize
+    std::vector<DataLayerPtr> dataLayers;
+    LayerMap layerMap;
+    vector<Argument> datas;
+    initDataLayer(configt, &dataLayers, &datas, &layerMap, "convTrans",
+                  100, false, false);
+    // test layer initialize
+    std::vector<ParameterPtr> parameters;
+    LayerPtr convtLayer;
+    initTestLayer(configt, &layerMap, &parameters, &convtLayer);
+    convtLayer->getBiasParameter()->zeroMem();
+    convtLayer->forward(PASS_GC);
+
+    // Setting up conv-layer config
+    TestConfig config;
+    config.biasSize = 16;
+    config.layerConfig.set_type("exconv");
+    config.layerConfig.set_num_filters(16);
+    config.layerConfig.set_partial_sum(1);
+    config.layerConfig.set_shared_biases(true);
+
+    config.inputDefs.push_back({INPUT_DATA, "layer_1", 768, 384});
+    input = config.layerConfig.add_inputs();
+    conv = input->mutable_conv_conf();
+    conv->set_filter_size(2);
+    conv->set_filter_size_y(4);
+    conv->set_channels(3);
+    conv->set_padding(0);
+    conv->set_padding_y(1);
+    conv->set_stride(2);
+    conv->set_stride_y(2);
+    conv->set_groups(1);
+    conv->set_filter_channels(conv->channels() / conv->groups());
+    conv->set_img_size(16);
+    conv->set_output_x(outputSize(conv->img_size(), conv->filter_size(),
+                                  conv->padding(), conv->stride(),
+                                  /* caffeMode */ true));
+    config.layerConfig.set_size(conv->output_x() * conv->output_x() *
+                                config.layerConfig.num_filters());
+    config.layerConfig.set_name("conv");
+
+    // data layer initialize
+    std::vector<DataLayerPtr> dataLayers2;
+    LayerMap layerMap2;
+    vector<Argument> datas2;
+    initDataLayer(config, &dataLayers2, &datas2, &layerMap2, "conv",
+                  100, false, false);
+    // test layer initialize
+    std::vector<ParameterPtr> parameters2;
+    LayerPtr convLayer;
+    initTestLayer(config, &layerMap2, &parameters2, &convLayer);
+
+    // Sync convLayer and convtLayer parameter
+    convLayer->getBiasParameter()->zeroMem();
+    convLayer->getParameters()[0]->getBuf(PARAMETER_VALUE)->copyFrom(
+            *(convtLayer->getParameters()[0]->getBuf(PARAMETER_VALUE)));
+
+    // Set convLayer outputGrad as convTransLayer input value
+    convLayer->forward(PASS_GC);
+    convLayer->getOutput().grad->copyFrom(*(dataLayers[0]->getOutputValue()));
+
+    vector<int> callbackFlags(parameters2.size(), 0);
+    auto callback = [&](Parameter* para) { ++callbackFlags[para->getID()]; };
+    convLayer->backward(callback);
+
+    // Check that the convLayer backward is the same as convTransLayer forward
+    checkMatrixEqual(convtLayer->getOutputValue(),
+                     dataLayers2[0]->getOutputGrad());
+}
+
+
+// Do one forward pass of convTrans layer and check to see if its output
+// matches the given result
+void doOneConvtTest(size_t imgSize, size_t output_x, size_t stride,
+                    size_t padding, size_t filter_size, MatrixPtr& result) {
+    TestConfig configt;
+    configt.biasSize = 1;
+    configt.layerConfig.set_type("exconvt");
+    configt.layerConfig.set_num_filters(1);
+    configt.layerConfig.set_partial_sum(1);
+    configt.layerConfig.set_shared_biases(true);
+
+    configt.inputDefs.push_back({INPUT_DATA, "layer_0", output_x * output_x,
+                                 filter_size * filter_size});
+    LayerInputConfig* input = configt.layerConfig.add_inputs();
+    ConvConfig* conv = input->mutable_conv_conf();
+    conv->set_filter_size(filter_size);
+    conv->set_filter_size_y(filter_size);
+    conv->set_channels(1);
+    conv->set_padding(padding);
+    conv->set_padding_y(padding);
+    conv->set_stride(stride);
+    conv->set_stride_y(stride);
+    conv->set_groups(1);
+    conv->set_filter_channels(1);
+    conv->set_img_size(imgSize);
+    conv->set_output_x(output_x);
+
+    configt.layerConfig.set_size(conv->img_size() * conv->img_size() *
+                                configt.layerConfig.num_filters());
+    configt.layerConfig.set_name("convTrans");
+
+    std::vector<DataLayerPtr> dataLayers;
+    LayerMap layerMap;
+    vector<Argument> datas;
+    initDataLayer(configt, &dataLayers, &datas, &layerMap, "convTrans",
+                  1, false, false);
+    dataLayers[0]->getOutputValue()->zeroMem();
+    dataLayers[0]->getOutputValue()->add(1.0);
+
+    // test layer initialize
+    std::vector<ParameterPtr> parameters;
+    LayerPtr convtLayer;
+    initTestLayer(configt, &layerMap, &parameters, &convtLayer);
+    convtLayer->getBiasParameter()->zeroMem();
+    convtLayer->getParameters()[0]->zeroMem();
+    convtLayer->getParameters()[0]->getBuf(PARAMETER_VALUE)->add(1.0);
+    convtLayer->forward(PASS_GC);
+
+    checkMatrixEqual(convtLayer->getOutputValue(), result);
+}
+
+TEST(Layer, convTransLayerFwd2) {
+    MatrixPtr result;
+    result = Matrix::create(1, 5 * 5, false, false);
+    result->zeroMem();
+    result->add(1.0);
+    doOneConvtTest(/* imgSize */ 5,
+                   /* output_x */ 1,
+                   /* stride */ 1,
+                   /* padding */ 0,
+                   /* filter_size */ 5,
+                   result);
+
+    float resultData[] = {1, 2, 2, 2, 1,
+                          2, 4, 4, 4, 2,
+                          2, 4, 4, 4, 2,
+                          2, 4, 4, 4, 2,
+                          1, 2, 2, 2, 1};
+    result->setData(resultData);
+    doOneConvtTest(/* imgSize */ 5,
+                   /* output_x */ 2,
+                   /* stride */ 1,
+                   /* padding */ 0,
+                   /* filter_size */ 4,
+                   result);
+
+    float resultData2[] = {1, 2, 2, 2, 1,
+                           2, 4, 4, 4, 2,
+                           2, 4, 4, 4, 2,
+                           2, 4, 4, 4, 2,
+                           1, 2, 2, 2, 1};
+    result->setData(resultData2);
+    doOneConvtTest(/* imgSize */ 5,
+                   /* output_x */ 2,
+                   /* stride */ 2,
+                   /* padding */ 1,
+                   /* filter_size */ 5,
+                   result);
+
+    float resultData3[] = {1, 1, 2, 1, 1,
+                           1, 1, 2, 1, 1,
+                           2, 2, 4, 2, 2,
+                           1, 1, 2, 1, 1,
+                           1, 1, 2, 1, 1};
+    result->setData(resultData3);
+    doOneConvtTest(/* imgSize */ 5,
+                   /* output_x */ 2,
+                   /* stride */ 2,
+                   /* padding */ 0,
+                   /* filter_size */ 3,
+                   result);}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  initMain(argc, argv);
+  FLAGS_thread_local_rand_use_global_seed = true;
+  srand(1);
+  return RUN_ALL_TESTS();
+}
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index 4e01fa91ed2ba6b40882d9995e52e9dbeb37f57e..7b6e6fd3999ff5d3386626997effe33fa6a75429 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -302,6 +302,8 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
                               config.layerConfig.num_filters());
 
   testLayerGrad(config, "conv", 100, trans, useGpu);
+  // Use small batch_size and useWeight=true to test biasGrad
+  testLayerGrad(config, "conv", 2, trans, useGpu, true, 0.02);
 }
 
 TEST(Layer, convLayer) {
@@ -312,6 +314,46 @@ TEST(Layer, convLayer) {
 #endif
 }
 
+
+void testConvTransLayer(const string& type, bool trans, bool useGpu) {
+  TestConfig config;
+  config.biasSize = 3;
+  config.layerConfig.set_type(type);
+  config.layerConfig.set_num_filters(3);
+  config.layerConfig.set_partial_sum(1);
+  config.layerConfig.set_shared_biases(true);
+
+  config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 288});
+  LayerInputConfig* input = config.layerConfig.add_inputs();
+  ConvConfig* conv = input->mutable_conv_conf();
+  conv->set_filter_size(2);
+  conv->set_filter_size_y(3);
+  conv->set_channels(16);
+  conv->set_padding(0);
+  conv->set_padding_y(1);
+  conv->set_stride(2);
+  conv->set_stride_y(2);
+  conv->set_groups(1);
+  conv->set_filter_channels(3 / conv->groups());
+  conv->set_img_size(16);
+  conv->set_output_x(outputSize(conv->img_size(), conv->filter_size(),
+                                conv->padding(), conv->stride(),
+                                /* caffeMode */ true));
+
+  config.layerConfig.set_size(conv->img_size() * conv->img_size() *
+                              config.layerConfig.num_filters());
+
+  testLayerGrad(config, "convTrans", 100, trans, useGpu);
+  // Use small batch_size and useWeight=true to test biasGrad
+  testLayerGrad(config, "convTrans", 2, trans, useGpu, true, 0.02);
+}
+
+TEST(Layer, convTransLayer) {
+  for (auto useGpu : {false, true}) {
+    testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu);
+  }
+}
+
 TEST(Layer, blockExpandLayer) {
   TestConfig config;
   config.biasSize = 0;
diff --git a/paddle/math/MathUtils.cpp b/paddle/math/MathUtils.cpp
index c1af8628d03c50185089b45f3a0502726da9137e..548f17936381c7e1c4d0c2c9661b197f3f06bd35 100644
--- a/paddle/math/MathUtils.cpp
+++ b/paddle/math/MathUtils.cpp
@@ -80,4 +80,17 @@ int outputSize(int imageSize, int filterSize, int padding, int stride,
   return outputSize;
 }
 
+int imageSize(int outputSize, int filterSize, int padding, int stride,
+              bool caffeMode) {
+  int imageSize;
+  if (!caffeMode) {
+   imageSize =
+       (outputSize - 1) * stride + filterSize - 2 * padding - stride + 1;
+  } else {
+   imageSize = (outputSize - 1) * stride + filterSize - 2 * padding;
+  }
+  CHECK_GE(imageSize, 1);
+  return imageSize;
+}
+
 }  // namespace paddle
diff --git a/paddle/math/MathUtils.h b/paddle/math/MathUtils.h
index 49d0c10a8f5e4dcdaf22dca77a3f113400b16646..91683dc3e9144df4664f46859ff5e2215dc34144 100644
--- a/paddle/math/MathUtils.h
+++ b/paddle/math/MathUtils.h
@@ -60,4 +60,11 @@ void sparseRand(int* major, int* minor, int nnz, int majorLen, int minorMax,
 int outputSize(int imageSize, int filterSize, int padding, int stride,
                bool caffeMode);
 
+/**
+ * Calculate image size based on output size and caffeMode_.
+ * It is the reverse function of outputSize()
+ */
+int imageSize(int outputSize, int filterSize, int padding, int stride,
+              bool caffeMode);
+
 }  // namespace paddle
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 73631602a92be4ef70eb783cf8cf063f2c0414c7..958bfdaf2e2839f375c796ce119de091a7cd82ae 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -649,7 +649,8 @@ class ConvProjection(Projection):
 
         parse_conv(conv_conf,
                    input_layer_name,
-                   self.proj_conf.conv_conf)
+                   self.proj_conf.conv_conf,
+                   num_filters)
         # TODO: support rectangle input
         self.proj_conf.output_size = (self.proj_conf.conv_conf.output_x  ** 2) * num_filters
 
@@ -730,7 +731,8 @@ class ConvOperator(Operator):
 
         parse_conv(conv_conf,
                    MakeLayerNameInSubmodel(input_layer_names[0]),
-                   self.operator_conf.conv_conf)
+                   self.operator_conf.conv_conf,
+                   num_filters)
         self.operator_conf.output_size = (self.operator_conf.conv_conf.output_x  ** 2) * num_filters
 
         config_assert(len(input_layer_names) == 2, "Conv is binary operator")
@@ -1017,6 +1019,17 @@ def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode):
     else:
         return 1 + int(math.ceil(output))
 
+'''
+calcualte image_size based on output_size for convolution. 
+It is the reverse function of cnn_output_size
+'''
+def cnn_image_size(output_size, filter_size, padding, stride, caffe_mode):
+    if caffe_mode:
+        img_size = (output_size - 1) * stride + filter_size - 2 * padding
+    else:
+        img_size = (output_size - 2) * stride + filter_size - 2 * padding + 1 
+    return img_size
+
 def parse_pool(pool, input_layer_name, pool_conf):
     pool_conf.pool_type = pool.pool_type
     config_assert(pool.pool_type in ['max-projection', 'avg-projection',
@@ -1082,7 +1095,11 @@ def parse_norm(norm, input_layer_name, norm_conf):
     else:
         norm_conf.scale /= norm.size ** 2
 
-def parse_conv(conv, input_layer_name, conv_conf):
+'''
+caffe_mode: compute the output size using floor instead of ceil,
+            which is consistent of caffe and CuDNN's convention.
+'''
+def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False):
     conv_conf.filter_size = conv.filter_size
     conv_conf.filter_size_y = conv.filter_size_y
     conv_conf.channels = conv.channels
@@ -1091,20 +1108,37 @@ def parse_conv(conv, input_layer_name, conv_conf):
     conv_conf.stride = conv.stride
     conv_conf.stride_y = conv.stride_y
     conv_conf.groups = conv.groups
-    conv_conf.filter_channels = conv.channels / conv.groups
     conv_conf.caffe_mode = conv.caffe_mode
-
-    img_pixels = g_layer_map[input_layer_name].size / conv.channels
-    print('channels=%d size=%d'%(conv.channels,
-      g_layer_map[input_layer_name].size))
-    conv_conf.img_size = int(img_pixels ** 0.5)
-    config_assert((conv_conf.img_size ** 2) == img_pixels,
-                  ("Input layer %s: Incorrect input image size %d for input "
-                   + "image pixels %d")
-                  % (input_layer_name, conv_conf.img_size, img_pixels))
-    conv_conf.output_x = cnn_output_size(conv_conf.img_size, conv_conf.filter_size,
-                                         conv_conf.padding, conv_conf.stride,
-                                         conv_conf.caffe_mode)
+    
+    if not trans:
+        conv_conf.filter_channels = conv.channels / conv.groups
+
+        img_pixels = g_layer_map[input_layer_name].size / conv.channels
+        print('channels=%d size=%d'%(conv.channels,
+          g_layer_map[input_layer_name].size))
+        conv_conf.img_size = int(img_pixels ** 0.5)
+        config_assert((conv_conf.img_size ** 2) == img_pixels,
+                      ("Input layer %s: Incorrect input image size %d for input "
+                       + "image pixels %d")
+                      % (input_layer_name, conv_conf.img_size, img_pixels))
+                
+        conv_conf.output_x = cnn_output_size(
+            conv_conf.img_size, conv_conf.filter_size, 
+            conv_conf.padding, conv_conf.stride, conv_conf.caffe_mode)
+    else:
+        conv_conf.filter_channels = num_filters / conv.groups
+        
+        outputSize = g_layer_map[input_layer_name].size / conv.channels
+        print('channels=%d size=%d'%(conv.channels,
+          g_layer_map[input_layer_name].size))
+        conv_conf.output_x = int(outputSize ** 0.5)
+        config_assert((conv_conf.output_x ** 2) == outputSize,
+                      ("Input layer %s: Incorrect input image size %d for input "
+                       + "image pixels %d")
+                      % (input_layer_name, conv_conf.output_x, outputSize))
+        conv_conf.img_size = cnn_image_size(
+            conv_conf.output_x, conv_conf.filter_size, 
+            conv_conf.padding, conv_conf.stride, conv_conf.caffe_mode)
 
 def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
     block_expand_conf.channels = block_expand.channels
@@ -1587,7 +1621,8 @@ class ConvLayerBase(LayerBase):
             parse_conv(
                 self.inputs[input_index].conv,
                 input_layer.name,
-                self.config.inputs[input_index].conv_conf)
+                self.config.inputs[input_index].conv_conf,
+                num_filters)
             conv_conf = self.config.inputs[input_index].conv_conf
             psize = self.calc_parameter_size(conv_conf)
             print("output size for %s is %d " % (name, conv_conf.output_x))
@@ -1612,6 +1647,63 @@ class ConvLayer(ConvLayerBase):
 class ConvLayer(ConvLayerBase):
     layer_type = 'cudnn_conv'
 
+
+@config_layer('convt')
+class ConvTransLayerBase(LayerBase):
+    layer_type = 'convt'
+    def __init__(
+            self,
+            name,
+            inputs=[],
+            bias=True,
+            num_filters=None,
+            shared_biases=False,
+            **xargs):
+        super(ConvTransLayerBase, self).__init__(
+            name, self.layer_type, 0, inputs=inputs, **xargs)
+
+        if num_filters is not None:
+            self.config.num_filters = num_filters
+
+        use_gpu = int(g_command_config_args.get("use_gpu", 0))
+        parallel_nn = int(g_command_config_args.get("parallel_nn", 0))
+
+        # cudnn_convt has not been implemented so use exconvt only
+        self.layer_type = "exconvt"
+        # need to specify layer in config
+        self.config.type = self.layer_type
+
+        if shared_biases is not None:
+            self.config.shared_biases = shared_biases
+
+        for input_index in xrange(len(self.inputs)):
+            input_layer = self.get_input_layer(input_index)
+            parse_conv(
+                self.inputs[input_index].conv,
+                input_layer.name,
+                self.config.inputs[input_index].conv_conf,
+                num_filters,
+                trans=True)
+            conv_conf = self.config.inputs[input_index].conv_conf
+            psize = self.calc_parameter_size(conv_conf)
+            print("output size for %s is %d " % (name, conv_conf.output_x))
+            self.create_input_parameter(input_index, psize)
+            self.set_layer_size(
+                (conv_conf.img_size ** 2) * self.config.num_filters)
+
+        psize = self.config.size
+        if shared_biases:
+            psize = self.config.num_filters
+        self.create_bias_parameter(bias, psize, [psize, 1])
+
+    def calc_parameter_size(self, conv_conf):
+        return conv_conf.channels * conv_conf.filter_channels \
+                    * (conv_conf.filter_size * conv_conf.filter_size_y)
+
+@config_layer('exconvt')
+class ConvTransLayer(ConvTransLayerBase):
+    layer_type = 'exconvt'
+
 @config_layer('norm')
 class NormLayer(LayerBase):
     def __init__(
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 6b5d39a47158b3b4804a72a1c2d13c096db166ce..aa53f94dd039f07d604d83f09a29c02713062947 100644
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -78,6 +78,7 @@ class LayerType(object):
     COSINE_SIM = 'cos'
     HSIGMOID = 'hsigmoid'
     CONV_LAYER = "conv"
+    CONVTRANS_LAYER = "convt"
     POOL_LAYER = "pool"
     BATCH_NORM_LAYER = 'batch_norm'
     NORM_LAYER = 'norm'
@@ -1517,7 +1518,8 @@ def img_conv_layer(input, filter_size, num_filters,
                    name=None, num_channels=None,
                    act=None, groups=1, stride=1, padding=0, bias_attr=None,
                    param_attr=None, shared_biases=True, layer_attr=None,
-                   filter_size_y=None, stride_y=None, padding_y=None):
+                   filter_size_y=None, stride_y=None, padding_y=None,
+                   trans=False):
     """
     Convolution layer for image. Paddle only support square input currently and
     thus input image's width equals height.
@@ -1525,7 +1527,14 @@ def img_conv_layer(input, filter_size, num_filters,
     The details of convolution layer, please refer UFLDL's `convolution
     <http://ufldl.stanford.edu/tutorial/supervised/
     FeatureExtractionUsingConvolution/>`_ .
-
+    
+    Convolution Transpose (deconv) layer for image. Paddle only support square 
+    input currently and thus input image's width equals height.
+
+    The details of convolution transpose layer, 
+    please refer to the following explanation and references therein
+    <http://datascience.stackexchange.com/questions/6107/
+    what-are-deconvolutional-layers/>`_ .
     The num_channel means input image's channel number. It may be 1 or 3 when
     input is raw pixels of image(mono or RGB), or it may be the previous layer's
     num_filters * num_group.
@@ -1575,6 +1584,8 @@ def img_conv_layer(input, filter_size, num_filters,
     :type shared_biases: bool
     :param layer_attr: Layer Extra Attribute.
     :type layer_attr: ExtraLayerAttribute
+    :param trans: true if it is a convTransLayer, false if it is a convLayer
+    :type trans: bool
     :return: LayerOutput object.
     :rtype: LayerOutput
     """
@@ -1610,6 +1621,9 @@ def img_conv_layer(input, filter_size, num_filters,
         param_attr.attr["initial_std"] = init_w
         param_attr.attr["initial_strategy"] = 0
         param_attr.attr["initial_smart"] = False
+    
+    lt = LayerType.CONVTRANS_LAYER if trans else LayerType.CONV_LAYER
+    
     Layer(
         name=name,
         inputs=Input(input.name, conv=Conv(
@@ -1622,10 +1636,10 @@ def img_conv_layer(input, filter_size, num_filters,
         num_filters=num_filters,
         bias=ParamAttr.to_bias(bias_attr),
         shared_biases=shared_biases,
-        type=LayerType.CONV_LAYER,
+        type=lt,
         **ExtraLayerAttribute.to_kwargs(layer_attr)
     )
-    return LayerOutput(name, LayerType.CONV_LAYER, parents=[input],
+    return LayerOutput(name, lt, parents=[input],
                        activation=act, num_filters=num_filters)
 
 
diff --git a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
index 77774f6fcfafd8ba724c17204140ef8137bcc1d5..b8687e1d483715d60ef1c235e5bac5b5844afab9 100755
--- a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
@@ -9,7 +9,7 @@ protostr=$PWD/protostr
 configs=(test_fc layer_activations projections test_print_layer
 test_sequence_pooling test_lstmemory_layer test_grumemory_layer
 last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
-img_layers util_layers simple_rnn_layers unused_layers test_cost_layers
+img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
 test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight
 test_maxout test_bi_grumemory math_ops)
 
diff --git a/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py b/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..077c78d2016484ce2a284ebb652abef9de64c121
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
@@ -0,0 +1,22 @@
+from paddle.trainer_config_helpers import *
+
+settings(
+    learning_rate=1e-3,
+    batch_size=1000
+)
+
+img = data_layer(name='image', size=227*227)
+
+# the parse_conv in config_parse.py is not strictly accurate when filter_size
+# is not square. So here set square filter_size.
+img_conv = img_conv_layer(input=img, num_channels=1, num_filters=64,
+                          filter_size=(32, 32), padding=(1, 1), stride=(1, 1),
+                          act=LinearActivation(), trans=True)
+img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
+
+img_norm = img_cmrnorm_layer(input=img_bn, size=32)
+
+img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling())
+
+
+outputs(img_pool, img_norm)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..38346354080b02bebd937fd998fd3c63c8030346
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
@@ -0,0 +1,176 @@
+type: "nn"
+layers {
+  name: "image"
+  type: "data"
+  size: 51529
+  active_type: ""
+}
+layers {
+  name: "__conv_0__"
+  type: "exconvt"
+  size: 4194304
+  active_type: ""
+  inputs {
+    input_layer_name: "image"
+    input_parameter_name: "___conv_0__.w0"
+    conv_conf {
+      filter_size: 32
+      channels: 1
+      stride: 1
+      padding: 1
+      groups: 1
+      filter_channels: 64
+      output_x: 227
+      img_size: 256
+      caffe_mode: true
+      filter_size_y: 32
+      padding_y: 1
+      stride_y: 1
+    }
+  }
+  bias_parameter_name: "___conv_0__.wbias"
+  num_filters: 64
+  shared_biases: true
+}
+layers {
+  name: "__batch_norm_0__"
+  type: "batch_norm"
+  size: 4194304
+  active_type: "relu"
+  inputs {
+    input_layer_name: "__conv_0__"
+    input_parameter_name: "___batch_norm_0__.w0"
+    image_conf {
+      channels: 64
+      img_size: 256
+    }
+  }
+  inputs {
+    input_layer_name: "__conv_0__"
+    input_parameter_name: "___batch_norm_0__.w1"
+  }
+  inputs {
+    input_layer_name: "__conv_0__"
+    input_parameter_name: "___batch_norm_0__.w2"
+  }
+  bias_parameter_name: "___batch_norm_0__.wbias"
+  moving_average_fraction: 0.9
+}
+layers {
+  name: "__crmnorm_0__"
+  type: "norm"
+  size: 4194304
+  active_type: ""
+  inputs {
+    input_layer_name: "__batch_norm_0__"
+    norm_conf {
+      norm_type: "cmrnorm-projection"
+      channels: 64
+      size: 32
+      scale: 0.0004
+      pow: 0.75
+      output_x: 256
+      img_size: 256
+      blocked: false
+    }
+  }
+}
+layers {
+  name: "__pool_0__"
+  type: "pool"
+  size: 3240000
+  active_type: ""
+  inputs {
+    input_layer_name: "__conv_0__"
+    pool_conf {
+      pool_type: "max-projection"
+      channels: 64
+      size_x: 32
+      stride: 1
+      output_x: 225
+      img_size: 256
+      padding: 0
+      size_y: 32
+      stride_y: 1
+      output_y: 225
+      img_size_y: 256
+      padding_y: 0
+    }
+  }
+}
+parameters {
+  name: "___conv_0__.w0"
+  size: 65536
+  initial_mean: 0.0
+  initial_std: 0.0441941738242
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___conv_0__.wbias"
+  size: 64
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 64
+  dims: 1
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___batch_norm_0__.w0"
+  size: 64
+  initial_mean: 1.0
+  initial_std: 0.0
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___batch_norm_0__.w1"
+  size: 64
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 64
+  initial_strategy: 0
+  initial_smart: false
+  is_static: true
+  is_shared: true
+}
+parameters {
+  name: "___batch_norm_0__.w2"
+  size: 64
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 64
+  initial_strategy: 0
+  initial_smart: false
+  is_static: true
+  is_shared: true
+}
+parameters {
+  name: "___batch_norm_0__.wbias"
+  size: 64
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 64
+  initial_strategy: 0
+  initial_smart: false
+}
+input_layer_names: "image"
+output_layer_names: "__pool_0__"
+output_layer_names: "__crmnorm_0__"
+sub_models {
+  name: "root"
+  layer_names: "image"
+  layer_names: "__conv_0__"
+  layer_names: "__batch_norm_0__"
+  layer_names: "__crmnorm_0__"
+  layer_names: "__pool_0__"
+  input_layer_names: "image"
+  output_layer_names: "__pool_0__"
+  output_layer_names: "__crmnorm_0__"
+  is_recurrent_layer_group: false
+}
+