diff --git a/paddle/gserver/layers/MKLDNNAddtoLayer.cpp b/paddle/gserver/layers/MKLDNNAddtoLayer.cpp
index 1ab3032316cc9fd2627bc3ed78e4ad7720b969e3..22c5fa8b7667b28b0305006c6eba0ae3af307bb4 100644
--- a/paddle/gserver/layers/MKLDNNAddtoLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNAddtoLayer.cpp
@@ -57,16 +57,15 @@ void MKLDNNAddtoLayer::reshape(
 }
 
 void MKLDNNAddtoLayer::resetFwd(std::vector<primitive>& pipeline,
-                                MKLDNNMatrixPtr& in,
+                                std::vector<MKLDNNMatrixPtr>& inputs,
                                 MKLDNNMatrixPtr& out) {
-  resetFwdBuffers(inVals_, biasVal_, out);
-  in = inVals_[0];
+  resetFwdBuffers(inputs, biasVal_, out);
 
   std::shared_ptr<sum::primitive_desc> fwdPD;
   std::shared_ptr<sum::primitive_desc> biasPD;
-  resetFwdPD(fwdPD, biasPD, inVals_, biasVal_, out);
+  resetFwdPD(fwdPD, biasPD, inputs, biasVal_, out);
 
-  resetFwdPipeline(pipeline, fwdPD, biasPD, inVals_, biasVal_, out);
+  resetFwdPipeline(pipeline, fwdPD, biasPD, inputs, biasVal_, out);
 }
 
 void MKLDNNAddtoLayer::resetBwd(std::vector<primitive>& pipeline,
@@ -206,7 +205,7 @@ void MKLDNNAddtoLayer::resetBwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
 
   inputs.resize(inputLayers_.size());
   for (size_t i = 0; i < inputs.size(); i++) {
-    resetInGrad(inputs[i], inVal_->getPrimitiveDesc(), i);
+    resetInGrad(inputs[i], inVals_[i]->getPrimitiveDesc(), i);
     CHECK_PRIMITIVE_DESC_EQ(inputs[i], out->getPrimitiveDesc());
   }
 
diff --git a/paddle/gserver/layers/MKLDNNAddtoLayer.h b/paddle/gserver/layers/MKLDNNAddtoLayer.h
index 1406496a7aebbfc334237809da93dcde25913561..6ad33950b1b6161203c6b8ef05fff472301415a6 100644
--- a/paddle/gserver/layers/MKLDNNAddtoLayer.h
+++ b/paddle/gserver/layers/MKLDNNAddtoLayer.h
@@ -26,7 +26,6 @@ namespace paddle {
  */
 class MKLDNNAddtoLayer : public MKLDNNLayer {
 protected:
-  std::vector<MKLDNNMatrixPtr> inVals_;
   std::vector<MKLDNNMatrixPtr> inGrads_;
 
   // layer size == ic * ih * iw == oc * oh *ow, and can not be changed
@@ -53,7 +52,7 @@ public:
       int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override;
 
   void resetFwd(std::vector<mkldnn::primitive>& pipeline,
-                MKLDNNMatrixPtr& in,
+                std::vector<MKLDNNMatrixPtr>& inputs,
                 MKLDNNMatrixPtr& out) override;
 
   void resetBwd(std::vector<mkldnn::primitive>& pipeline,
@@ -62,18 +61,6 @@ public:
 
   void updateWeights(const UpdateCallback& callback) override;
 
-  void printValueFormat() override {
-    for (size_t i = 0; i < inVals_.size(); ++i) {
-      VLOG(MKLDNN_FMTS) << i << " input: " << inVals_[i]->getFormat() << " >>>";
-    }
-    if (outVal_) {
-      VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> ";
-    }
-    if (extOutVal_) {
-      VLOG(MKLDNN_FMTS) << extOutVal_->getFormat();
-    }
-  }
-
   void printGradFormat() override {
     if (extOutGrad_) {
       VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat();
diff --git a/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp b/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp
index 96e5a99f3377e0016244fea7e1eb0943e95be80b..8c8101adc4faa371b817309ab3bea2b01ff1cafe 100644
--- a/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp
@@ -128,7 +128,7 @@ void MKLDNNBatchNormLayer::reshape(
 }
 
 void MKLDNNBatchNormLayer::resetFwd(std::vector<primitive>& pipeline,
-                                    MKLDNNMatrixPtr& in,
+                                    std::vector<MKLDNNMatrixPtr>& inputs,
                                     MKLDNNMatrixPtr& out) {
   // In training phase, it will always calculate mean and var,
   // so useGlobalStats must be false.
@@ -138,11 +138,11 @@ void MKLDNNBatchNormLayer::resetFwd(std::vector<primitive>& pipeline,
     useGlobalStats_ = false;
   }
 
-  resetFwdBuffers(in, wgtVal_, out);
+  resetFwdBuffers(inputs[0], wgtVal_, out);
 
-  resetFwdPD(fwdPD_, in, wgtVal_, out);
+  resetFwdPD(fwdPD_, inputs[0], wgtVal_, out);
 
-  resetFwdPipeline(pipeline, fwdPD_, in, wgtVal_, out);
+  resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, out);
 }
 
 void MKLDNNBatchNormLayer::resetBwd(std::vector<primitive>& pipeline,
@@ -256,9 +256,9 @@ void MKLDNNBatchNormLayer::resetFwdPipeline(
 void MKLDNNBatchNormLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
                                            MKLDNNMatrixPtr& wgt,
                                            MKLDNNMatrixPtr& out) {
-  CHECK(inVal_ && outVal_);
+  CHECK(inVals_[0] && outVal_);
   resetOutGrad(out, outVal_->getPrimitiveDesc());
-  resetInGrad(in, inVal_->getPrimitiveDesc());
+  resetInGrad(in, inVals_[0]->getPrimitiveDesc());
   if (gradScaleShift_) {
     CHECK(wgtVal_);
     resetWithMatrix(wgt, gradScaleShift_, wgtVal_->getPrimitiveDesc());
@@ -293,11 +293,12 @@ void MKLDNNBatchNormLayer::resetBwdPipeline(
   if (pd == nullptr) {
     return;
   }
-  CHECK(inVal_);
+  CHECK(inVals_[0]);
   bwdData_.reset(
       wgt && wgtVal_
-          ? new bn_bwd(*pd, *inVal_, *mean_, *var_, *out, *wgtVal_, *in, *wgt)
-          : new bn_bwd(*pd, *inVal_, *mean_, *var_, *out, *in));
+          ? new bn_bwd(
+                *pd, *inVals_[0], *mean_, *var_, *out, *wgtVal_, *in, *wgt)
+          : new bn_bwd(*pd, *inVals_[0], *mean_, *var_, *out, *in));
   pipeline.push_back(*bwdData_);
 }
 
diff --git a/paddle/gserver/layers/MKLDNNBatchNormLayer.h b/paddle/gserver/layers/MKLDNNBatchNormLayer.h
index a9a425ee33bc2800fef229bb9fd814fd86070b0d..be6385635660e8245a3177b5d4cf9bbef97aa32c 100644
--- a/paddle/gserver/layers/MKLDNNBatchNormLayer.h
+++ b/paddle/gserver/layers/MKLDNNBatchNormLayer.h
@@ -76,7 +76,7 @@ public:
       int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override;
 
   void resetFwd(std::vector<mkldnn::primitive>& pipeline,
-                MKLDNNMatrixPtr& in,
+                std::vector<MKLDNNMatrixPtr>& inputs,
                 MKLDNNMatrixPtr& out) override;
 
   void resetBwd(std::vector<mkldnn::primitive>& pipeline,
diff --git a/paddle/gserver/layers/MKLDNNConcatLayer.cpp b/paddle/gserver/layers/MKLDNNConcatLayer.cpp
index a3106b0c06cca4db3e46899fb347c7b8cb9639ae..aa8ca898c8b033aa5fd362e3a637ed37e0e3db13 100644
--- a/paddle/gserver/layers/MKLDNNConcatLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNConcatLayer.cpp
@@ -59,15 +59,14 @@ void MKLDNNConcatLayer::reshape(
 }
 
 void MKLDNNConcatLayer::resetFwd(std::vector<primitive>& pipeline,
-                                 MKLDNNMatrixPtr& in,
+                                 std::vector<MKLDNNMatrixPtr>& inputs,
                                  MKLDNNMatrixPtr& out) {
-  resetFwdBuffers(inVals_, out);
-  in = inVals_[0];
+  resetFwdBuffers(inputs, out);
 
   std::shared_ptr<concat::primitive_desc> fwdPD;
-  resetFwdPD(fwdPD, inVals_, out);
+  resetFwdPD(fwdPD, inputs, out);
 
-  resetFwdPipeline(pipeline, fwdPD, inVals_, out);
+  resetFwdPipeline(pipeline, fwdPD, inputs, out);
 }
 
 void MKLDNNConcatLayer::resetBwd(std::vector<primitive>& pipeline,
@@ -157,14 +156,9 @@ void MKLDNNConcatLayer::resetBwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
   inputs.resize(inputLayers_.size());
   for (size_t i = 0; i < inputs.size(); i++) {
     CHECK(inVals_[i]);
-    // resetInGrad will use inVal_
-    // TODO(TJ): change move inVals_ to MKLDNNLayer ans remove inVal_
-    inVal_ = inVals_[i];
     resetInGrad(inputs[i], inVals_[i]->getPrimitiveDesc(), i);
     CHECK_PRIMITIVE_DESC_EQ(inputs[i], inVals_[i]->getPrimitiveDesc());
   }
-  // change back, inVal_ always save the input 0
-  inVal_ = inVals_[0];
 }
 
 void MKLDNNConcatLayer::resetBwdPipeline(
diff --git a/paddle/gserver/layers/MKLDNNConcatLayer.h b/paddle/gserver/layers/MKLDNNConcatLayer.h
index 2750a6ed2940d1ddb3191b184bb5fc12b25c81cd..14863aed3c25a767727d4b480896c76efc9a67dd 100644
--- a/paddle/gserver/layers/MKLDNNConcatLayer.h
+++ b/paddle/gserver/layers/MKLDNNConcatLayer.h
@@ -26,7 +26,6 @@ namespace paddle {
  */
 class MKLDNNConcatLayer : public MKLDNNLayer {
 protected:
-  std::vector<MKLDNNMatrixPtr> inVals_;
   std::vector<MKLDNNMatrixPtr> inGrads_;
   std::vector<std::shared_ptr<mkldnn::primitive>> bwds_;
   // input channel numbers
@@ -50,7 +49,7 @@ public:
       int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override;
 
   void resetFwd(std::vector<mkldnn::primitive>& pipeline,
-                MKLDNNMatrixPtr& in,
+                std::vector<MKLDNNMatrixPtr>& inputs,
                 MKLDNNMatrixPtr& out) override;
 
   void resetBwd(std::vector<mkldnn::primitive>& pipeline,
@@ -68,19 +67,6 @@ public:
                        << ", " << ow_;
   }
 
-  void printValueFormat() override {
-    for (size_t i = 0; i < inVals_.size(); ++i) {
-      VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName()
-                        << ": " << inVals_[i]->getFormat() << " >>>";
-    }
-    if (outVal_) {
-      VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> ";
-    }
-    if (extOutVal_) {
-      VLOG(MKLDNN_FMTS) << extOutVal_->getFormat();
-    }
-  }
-
   void printGradFormat() override {
     if (extOutGrad_) {
       VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat();
diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp
index 5d89f230d28ab114fc3676951cbd54670bdd857c..0bacd6a9d3685d96acb9e111f3fc9067d711fc4d 100644
--- a/paddle/gserver/layers/MKLDNNConvLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp
@@ -105,13 +105,13 @@ void MKLDNNConvLayer::reshape(
 }
 
 void MKLDNNConvLayer::resetFwd(std::vector<primitive>& pipeline,
-                               MKLDNNMatrixPtr& in,
+                               std::vector<MKLDNNMatrixPtr>& inputs,
                                MKLDNNMatrixPtr& out) {
   resetFwdPD(fwdPD_);
 
-  resetFwdBuffers(fwdPD_, in, wgtVal_, biasVal_, out);
+  resetFwdBuffers(fwdPD_, inputs[0], wgtVal_, biasVal_, out);
 
-  resetFwdPipeline(pipeline, fwdPD_, in, wgtVal_, biasVal_, out);
+  resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, biasVal_, out);
 }
 
 void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline,
@@ -232,14 +232,14 @@ void MKLDNNConvLayer::resetBwdWgtPD(
   loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR);
 
   // create backward weight using input, output and weight value memory desc
-  CHECK(inVal_) << "Should have internal input value";
+  CHECK(inVals_[0]) << "Should have internal input value";
   CHECK(outVal_) << "Should have internal output value";
   CHECK(wgtVal_) << "Should have weight value";
   algorithm algo = algorithm::convolution_direct;
   padding_kind padKind = padding_kind::zero;
   auto bwdWgtDesc = biasVal_ != nullptr
                         ? conv_bwdWgt::desc(algo,
-                                            inVal_->getMemoryDesc(),
+                                            inVals_[0]->getMemoryDesc(),
                                             wgtVal_->getMemoryDesc(),
                                             biasVal_->getMemoryDesc(),
                                             outVal_->getMemoryDesc(),
@@ -248,7 +248,7 @@ void MKLDNNConvLayer::resetBwdWgtPD(
                                             padR,
                                             padKind)
                         : conv_bwdWgt::desc(algo,
-                                            inVal_->getMemoryDesc(),
+                                            inVals_[0]->getMemoryDesc(),
                                             wgtVal_->getMemoryDesc(),
                                             outVal_->getMemoryDesc(),
                                             strides,
@@ -256,7 +256,7 @@ void MKLDNNConvLayer::resetBwdWgtPD(
                                             padR,
                                             padKind);
   pd.reset(new conv_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_));
-  CHECK_PRIMITIVE_DESC_EQ(inVal_, pd->src_primitive_desc());
+  CHECK_PRIMITIVE_DESC_EQ(inVals_[0], pd->src_primitive_desc());
   CHECK_PRIMITIVE_DESC_EQ(
       outVal_,
       pd->diff_dst_primitive_desc(),
@@ -276,12 +276,12 @@ void MKLDNNConvLayer::resetBwdDataPD(
 
   memory::dims wgtDims, biasDims, strides, dilations, padL, padR;
   loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR);
-  CHECK(inVal_) << "Should have internal input value";
+  CHECK(inVals_[0]) << "Should have internal input value";
   CHECK(outVal_) << "Should have internal output value";
   // create backward data using input and output value memory desc
   // but using weight memory desc with any format
   auto bwdDataDesc = conv_bwdData::desc(algorithm::convolution_direct,
-                                        inVal_->getMemoryDesc(),
+                                        inVals_[0]->getMemoryDesc(),
                                         MKLDNNMatrix::createMemoryDesc(wgtDims),
                                         outVal_->getMemoryDesc(),
                                         strides,
@@ -290,7 +290,7 @@ void MKLDNNConvLayer::resetBwdDataPD(
                                         padding_kind::zero);
   pd.reset(new conv_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_));
   CHECK_PRIMITIVE_DESC_EQ(
-      inVal_,
+      inVals_[0],
       pd->diff_src_primitive_desc(),
       "primitive desc of in value and grad should be equal");
   CHECK_PRIMITIVE_DESC_EQ(
@@ -342,12 +342,12 @@ void MKLDNNConvLayer::resetBwdPipeline(
     MKLDNNMatrixPtr& wgt,
     MKLDNNMatrixPtr& bias,
     MKLDNNMatrixPtr& out) {
-  CHECK(inVal_);
+  CHECK(inVals_[0]);
   // add bwdWgt handle
   if (bias) {
-    bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt, *bias));
+    bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVals_[0], *out, *wgt, *bias));
   } else {
-    bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt));
+    bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVals_[0], *out, *wgt));
   }
   pipeline.push_back(*bwdWgt_);
 
diff --git a/paddle/gserver/layers/MKLDNNConvLayer.h b/paddle/gserver/layers/MKLDNNConvLayer.h
index 900f42af847a222a81ddd5347aae438bba0860fe..ff416e4f31da2acb087dae34924c19ca1071586b 100644
--- a/paddle/gserver/layers/MKLDNNConvLayer.h
+++ b/paddle/gserver/layers/MKLDNNConvLayer.h
@@ -72,7 +72,7 @@ public:
       int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override;
 
   void resetFwd(std::vector<mkldnn::primitive>& pipeline,
-                MKLDNNMatrixPtr& in,
+                std::vector<MKLDNNMatrixPtr>& inputs,
                 MKLDNNMatrixPtr& out) override;
 
   void resetBwd(std::vector<mkldnn::primitive>& pipeline,
diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp
index ccf11e04a37e9e0a26ebdb96db5c8387930e1423..9cb1580672c18f55ffb2190263904947c1996529 100644
--- a/paddle/gserver/layers/MKLDNNFcLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp
@@ -87,13 +87,13 @@ void MKLDNNFcLayer::reshape(
 }
 
 void MKLDNNFcLayer::resetFwd(std::vector<primitive>& pipeline,
-                             MKLDNNMatrixPtr& in,
+                             std::vector<MKLDNNMatrixPtr>& inputs,
                              MKLDNNMatrixPtr& out) {
-  resetFwdBuffers(in, wgtVal_, biasVal_, out);
+  resetFwdBuffers(inputs[0], wgtVal_, biasVal_, out);
 
-  resetFwdPD(fwdPD_, in, wgtVal_, biasVal_, out);
+  resetFwdPD(fwdPD_, inputs[0], wgtVal_, biasVal_, out);
 
-  resetFwdPipeline(pipeline, fwdPD_, in, wgtVal_, biasVal_, out);
+  resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, biasVal_, out);
 }
 
 void MKLDNNFcLayer::resetBwd(std::vector<primitive>& pipeline,
@@ -189,9 +189,9 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
                                     MKLDNNMatrixPtr& wgt,
                                     MKLDNNMatrixPtr& bias,
                                     MKLDNNMatrixPtr& out) {
-  CHECK(inVal_ && outVal_);
+  CHECK(inVals_[0] && outVal_);
   resetOutGrad(out, outVal_->getPrimitiveDesc());
-  resetInGrad(in, inVal_->getPrimitiveDesc());
+  resetInGrad(in, inVals_[0]->getPrimitiveDesc());
 
   CHECK(wgtVal_);
   resetWithMatrix(wgt, weight_->getWGrad(), wgtVal_->getPrimitiveDesc());
@@ -208,14 +208,15 @@ void MKLDNNFcLayer::resetBwdWgtPD(
     MKLDNNMatrixPtr& wgt,
     MKLDNNMatrixPtr& bias,
     MKLDNNMatrixPtr& out) {
-  CHECK(inVal_);
-  fc_bwdWgt::desc bwdWgtDesc = bias ? fc_bwdWgt::desc(inVal_->getMemoryDesc(),
-                                                      wgt->getMemoryDesc(),
-                                                      bias->getMemoryDesc(),
-                                                      out->getMemoryDesc())
-                                    : fc_bwdWgt::desc(inVal_->getMemoryDesc(),
-                                                      wgt->getMemoryDesc(),
-                                                      out->getMemoryDesc());
+  CHECK(inVals_[0]);
+  fc_bwdWgt::desc bwdWgtDesc =
+      bias ? fc_bwdWgt::desc(inVals_[0]->getMemoryDesc(),
+                             wgt->getMemoryDesc(),
+                             bias->getMemoryDesc(),
+                             out->getMemoryDesc())
+           : fc_bwdWgt::desc(inVals_[0]->getMemoryDesc(),
+                             wgt->getMemoryDesc(),
+                             out->getMemoryDesc());
   pd.reset(new fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_));
 }
 
@@ -241,11 +242,11 @@ void MKLDNNFcLayer::resetBwdPipeline(
     MKLDNNMatrixPtr& wgt,
     MKLDNNMatrixPtr& bias,
     MKLDNNMatrixPtr& out) {
-  CHECK(inVal_);
+  CHECK(inVals_[0]);
   if (bias) {
-    bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt, *bias));
+    bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVals_[0], *out, *wgt, *bias));
   } else {
-    bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt));
+    bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVals_[0], *out, *wgt));
   }
   pipeline.push_back(*bwdWgt_);
 
diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h
index a9c916ea13299306b024adb8a3ad4f7cc5ac3c46..a7ea4cd4311435f10d799b9389c390c3ed4a7f3c 100644
--- a/paddle/gserver/layers/MKLDNNFcLayer.h
+++ b/paddle/gserver/layers/MKLDNNFcLayer.h
@@ -55,7 +55,7 @@ public:
       int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override;
 
   void resetFwd(std::vector<mkldnn::primitive>& pipeline,
-                MKLDNNMatrixPtr& in,
+                std::vector<MKLDNNMatrixPtr>& inputs,
                 MKLDNNMatrixPtr& out) override;
 
   void resetBwd(std::vector<mkldnn::primitive>& pipeline,
diff --git a/paddle/gserver/layers/MKLDNNLayer.cpp b/paddle/gserver/layers/MKLDNNLayer.cpp
index 02170ea8160d7f7ed6208d3a5144399791878ffb..99350cd012c8c955b347875b98c2157a5661ffed 100644
--- a/paddle/gserver/layers/MKLDNNLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNLayer.cpp
@@ -53,25 +53,17 @@ void MKLDNNLayer::forward(PassType passType) {
       VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward";
       // reset when input total sizes changed, not only the batchsize
       inputElemenCnt_ = elemenCnt;
-      pipelineFwd_.clear();
       reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_);
       printSizeInfo();
-      // all cpu device output grad or value share output's
+      // the output_.value and output_.grad are shared with CPU device
       shareCPUDevice();
-      resetFwd(pipelineFwd_, inVal_, outVal_);
-      // MKLDNNLayer output value should be MKLDNNMatrix
-      // so external output value is necessary.
-      // Then external input value is not necessary,
-      // since input may be mkldnn internal buffer.
-      CHECK(extOutVal_) << "external output value is necessary";
-      output_.value = std::dynamic_pointer_cast<Matrix>(extOutVal_);
-      CHECK(inVal_ && outVal_) << "internal memories are necessary";
-      if (cvtInVal_) {
-        pipelineFwd_.insert(pipelineFwd_.begin(), *cvtInVal_);
-      }
-      if (cvtOutVal_) {
-        pipelineFwd_.push_back(*cvtOutVal_);
-      }
+
+      pipelineFwd_.clear();
+      inVals_.resize(inputLayers_.size(), nullptr);
+      extInVals_.resize(inputLayers_.size(), nullptr);
+      cvtInVals_.resize(inputLayers_.size(), nullptr);
+      resetFwd(pipelineFwd_, inVals_, outVal_);
+      prepareValueConversions(pipelineFwd_);
       convertWeightsFromPaddle();
       printValueFormat();
       needResetBwd_ = true;
@@ -80,8 +72,8 @@ void MKLDNNLayer::forward(PassType passType) {
     if (inputLayers_[0]->getType() == "data" && inputLayers_.size() == 1) {
       // Update input value data when input layer is "data" type,
       // since the input value data address might be changed.
-      CHECK(extInVal_);
-      extInVal_->setData(getInputValue(0, CPU_DEVICE)->getData());
+      CHECK(extInVals_[0]);
+      extInVals_[0]->setData(getInputValue(0, CPU_DEVICE)->getData());
     }
 
     if (!outputOnlyMKLDNN_) {
@@ -141,8 +133,8 @@ void MKLDNNLayer::backward(const UpdateCallback& callback) {
 void MKLDNNLayer::reshapeInput(int& batchsize,
                                int& height,
                                int& width,
-                               size_t inputIdx) {
-  const Argument& input = inputLayers_[inputIdx]->getOutput();
+                               size_t idx) {
+  const Argument& input = inputLayers_[idx]->getOutput();
   batchsize = input.getBatchSize();
   int h = input.getFrameHeight();
   int w = input.getFrameWidth();
@@ -176,29 +168,30 @@ void MKLDNNLayer::resetWithMatrix(MKLDNNMatrixPtr& dnn,
 void MKLDNNLayer::resetInValue(
     MKLDNNMatrixPtr& in,
     const std::shared_ptr<memory::primitive_desc>& intPD,
-    size_t inputIdx,
+    size_t idx,
     int inputChannel) {
-  cvtInVal_ = nullptr;
-  extInVal_ = nullptr;
+  cvtInVals_[idx] = nullptr;
+  extInVals_[idx] = nullptr;
   in = nullptr;
   inputChannel = inputChannel == 0 ? ic_ : inputChannel;
   CHECK_GT(bs_ * inputChannel * ih_ * iw_, 0);
   auto extPD = MKLDNNMatrix::createPrimitiveDesc(
       {bs_, inputChannel, ih_, iw_}, format::nchw, engine_);
-  const MatrixPtr& inMat = inputLayers_[inputIdx]->getOutputValue();
-  extInVal_ = std::dynamic_pointer_cast<MKLDNNMatrix>(inMat);
-  CHECK_EQ(inputIsOnlyMKLDNN(), extInVal_ != nullptr);
-  if (extInVal_ == nullptr || extInVal_->getFormat() == format::nc) {
-    extInVal_ = MKLDNNMatrix::create(extPD, inMat);
+  const MatrixPtr& inMat = inputLayers_[idx]->getOutputValue();
+  extInVals_[idx] = std::dynamic_pointer_cast<MKLDNNMatrix>(inMat);
+  CHECK_EQ(inputIsOnlyMKLDNN(), extInVals_[idx] != nullptr);
+  if (extInVals_[idx] == nullptr ||
+      extInVals_[idx]->getFormat() == format::nc) {
+    extInVals_[idx] = MKLDNNMatrix::create(extPD, inMat);
   }
-  in = extInVal_;
+  in = extInVals_[idx];
   if (nullptr == intPD || in->getPrimitiveDesc() == *intPD) {
     return;
   }
   // need create reorder
   in = MKLDNNMatrix::create(*intPD);
-  cvtInVal_ = MKLDNNMatrix::createReorder(extInVal_, in);
-  CHECK(cvtInVal_) << "should not be emptry";
+  cvtInVals_[idx] = MKLDNNMatrix::createReorder(extInVals_[idx], in);
+  CHECK(cvtInVals_[idx]) << "should not be emptry";
 }
 
 void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out,
@@ -220,11 +213,11 @@ void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out,
 
 void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in,
                               memory::primitive_desc intPD,
-                              size_t inputIdx) {
+                              size_t idx) {
   cvtInGrad_ = nullptr;
   extInGrad_ = nullptr;
   in = nullptr;
-  LayerPtr& input = inputLayers_[inputIdx];
+  LayerPtr& input = inputLayers_[idx];
   if (input->getOutputGrad() == nullptr) {
     // no need input grad
     return;
@@ -239,7 +232,7 @@ void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in,
   in = MKLDNNMatrix::create(intPD, inMat);
   Argument& arg = input->getOutput(this->getName());
   arg.grad = std::dynamic_pointer_cast<Matrix>(in);
-  CHECK_PRIMITIVE_DESC_EQ(inVal_, intPD);
+  CHECK_PRIMITIVE_DESC_EQ(inVals_[idx], intPD);
   if (inputIsOnlyMKLDNN()) {
     return;
   }
@@ -249,10 +242,11 @@ void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in,
     return;
   }
   // need create reorder
-  CHECK(extInVal_ != nullptr && isPaddleFormat(extInVal_->getFormat()))
+  CHECK(extInVals_[idx] != nullptr &&
+        isPaddleFormat(extInVals_[idx]->getFormat()))
       << "should have external input value and the format must be nchw(nc)";
-  extInGrad_ = MKLDNNMatrix::create(extInVal_->getPrimitiveDesc(), inMat);
-  CHECK_PRIMITIVE_DESC_EQ(inVal_, intPD);
+  extInGrad_ = MKLDNNMatrix::create(extInVals_[idx]->getPrimitiveDesc(), inMat);
+  CHECK_PRIMITIVE_DESC_EQ(inVals_[idx], intPD);
   in = MKLDNNMatrix::create(intPD);
   cvtInGrad_ = MKLDNNMatrix::createReorder(in, extInGrad_);
   CHECK(cvtInGrad_);
diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h
index 0e271908099b0d0e513233a7130f1b199281dfde..0ae4b8087f92afe44c9715552df5d5edfd39debf 100644
--- a/paddle/gserver/layers/MKLDNNLayer.h
+++ b/paddle/gserver/layers/MKLDNNLayer.h
@@ -68,17 +68,17 @@ protected:
    * When all layers are mkldnn layers, they could save internal data.
    */
   // below MKLDNNMatrix buffers are all internal buffers
-  MKLDNNMatrixPtr inVal_;
+  std::vector<MKLDNNMatrixPtr> inVals_;
   MKLDNNMatrixPtr inGrad_;
   MKLDNNMatrixPtr outVal_;
   MKLDNNMatrixPtr outGrad_;
   // below are external value and grad
-  MKLDNNMatrixPtr extInVal_;
+  std::vector<MKLDNNMatrixPtr> extInVals_;
   MKLDNNMatrixPtr extInGrad_;
   MKLDNNMatrixPtr extOutVal_;
   MKLDNNMatrixPtr extOutGrad_;
   // convert handle between external and internal buffers
-  std::shared_ptr<mkldnn::reorder> cvtInVal_;
+  std::vector<std::shared_ptr<mkldnn::reorder>> cvtInVals_;
   std::shared_ptr<mkldnn::reorder> cvtInGrad_;
   std::shared_ptr<mkldnn::reorder> cvtOutVal_;
   std::shared_ptr<mkldnn::reorder> cvtOutGrad_;
@@ -138,7 +138,7 @@ public:
    * weight and bias buffers should be coverd by child class itself
    */
   virtual void resetFwd(std::vector<mkldnn::primitive>& pipeline,
-                        MKLDNNMatrixPtr& in,
+                        std::vector<MKLDNNMatrixPtr>& inputs,
                         MKLDNNMatrixPtr& out) = 0;
 
   /**
@@ -176,10 +176,7 @@ protected:
   /**
    * reshape the input image sizes and input batchsize
    */
-  void reshapeInput(int& batchsize,
-                    int& height,
-                    int& width,
-                    size_t inputIdx = 0);
+  void reshapeInput(int& batchsize, int& height, int& width, size_t idx = 0);
 
   /**
    * reshape output image sizes
@@ -202,7 +199,7 @@ protected:
   void resetInValue(
       MKLDNNMatrixPtr& in,
       const std::shared_ptr<mkldnn::memory::primitive_desc>& intPD = nullptr,
-      size_t inputIdx = 0,
+      size_t idx = 0,
       int inputChannel = 0);
 
   /**
@@ -218,7 +215,7 @@ protected:
    */
   void resetInGrad(MKLDNNMatrixPtr& in,
                    mkldnn::memory::primitive_desc intPD,
-                   size_t inputIdx = 0);
+                   size_t idx = 0);
 
   /**
    * reset output grad from internal primitive desc.
@@ -296,17 +293,19 @@ protected:
    * print the mkldnn memory format of value
    */
   virtual void printValueFormat() {
-    if (extInVal_) {
-      VLOG(MKLDNN_FMTS) << extInVal_->getFormat() << " >>> ";
-    }
-    if (inVal_) {
-      VLOG(MKLDNN_FMTS) << inVal_->getFormat() << " >>>";
+    for (size_t i = 0; i < inVals_.size(); ++i) {
+      if (!inVals_[i]) {
+        continue;
+      }
+      VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName()
+                        << ": " << (extInVals_[i] ? extInVals_[i]->getFormat()
+                                                  : inVals_[i]->getFormat())
+                        << " >>> " << inVals_[i]->getFormat() << " >>>";
     }
     if (outVal_) {
-      VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> ";
-    }
-    if (extOutVal_) {
-      VLOG(MKLDNN_FMTS) << extOutVal_->getFormat();
+      VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> "
+                        << (extOutVal_ ? extOutVal_->getFormat()
+                                       : outVal_->getFormat());
     }
     if (wgtVal_) {
       VLOG(MKLDNN_FMTS) << "Weight value format: " << wgtVal_->getFormat();
@@ -437,6 +436,24 @@ private:
       outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims;
     }
   }
+
+  void prepareValueConversions(std::vector<mkldnn::primitive>& pipeline) {
+    // MKLDNNLayer output value should be MKLDNNMatrix
+    // so external output value is necessary.
+    // Then external input value is not necessary,
+    // since input may be mkldnn internal buffer.
+    CHECK(extOutVal_) << "external output value is necessary";
+    output_.value = std::dynamic_pointer_cast<Matrix>(extOutVal_);
+    CHECK(inVals_[0] && outVal_) << "internal memories are necessary";
+    for (size_t i = 0; i < cvtInVals_.size(); ++i) {
+      if (cvtInVals_[i]) {
+        pipeline.insert(pipeline.begin(), *cvtInVals_[i]);
+      }
+    }
+    if (cvtOutVal_) {
+      pipeline.push_back(*cvtOutVal_);
+    }
+  }
 };
 
 }  // namespace paddle
diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.cpp b/paddle/gserver/layers/MKLDNNPoolLayer.cpp
index 79102aba00b062562c7ea899bc7408c4f6990a8d..21a04cd5ee020986e40ebe4f24897fd64a0d98ca 100644
--- a/paddle/gserver/layers/MKLDNNPoolLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNPoolLayer.cpp
@@ -74,13 +74,13 @@ void MKLDNNPoolLayer::reshape(
 }
 
 void MKLDNNPoolLayer::resetFwd(std::vector<primitive>& pipeline,
-                               MKLDNNMatrixPtr& in,
+                               std::vector<MKLDNNMatrixPtr>& inputs,
                                MKLDNNMatrixPtr& out) {
-  resetFwdBuffers(in, out);
+  resetFwdBuffers(inputs[0], out);
 
-  resetFwdPD(fwdPD_, in, out);
+  resetFwdPD(fwdPD_, inputs[0], out);
 
-  resetFwdPipeline(pipeline, fwdPD_, in, out);
+  resetFwdPipeline(pipeline, fwdPD_, inputs[0], out);
 }
 
 void MKLDNNPoolLayer::resetBwd(std::vector<primitive>& pipeline,
@@ -147,9 +147,9 @@ void MKLDNNPoolLayer::resetFwdPipeline(
 
 void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
                                       MKLDNNMatrixPtr& out) {
-  CHECK(inVal_ && outVal_);
+  CHECK(inVals_[0] && outVal_);
   resetOutGrad(out, outVal_->getPrimitiveDesc());
-  resetInGrad(in, inVal_->getPrimitiveDesc());
+  resetInGrad(in, inVals_[0]->getPrimitiveDesc());
 }
 
 void MKLDNNPoolLayer::resetBwdPD(std::shared_ptr<pool_bwd::primitive_desc>& pd,
diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.h b/paddle/gserver/layers/MKLDNNPoolLayer.h
index 972419c5af0ac9e67621b7c661c1751d7d9932b8..242846cbcf7e3b76f7d47ecbf63e63073d6c9521 100644
--- a/paddle/gserver/layers/MKLDNNPoolLayer.h
+++ b/paddle/gserver/layers/MKLDNNPoolLayer.h
@@ -56,7 +56,7 @@ public:
       int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override;
 
   void resetFwd(std::vector<mkldnn::primitive>& pipeline,
-                MKLDNNMatrixPtr& in,
+                std::vector<MKLDNNMatrixPtr>& inputs,
                 MKLDNNMatrixPtr& out) override;
 
   void resetBwd(std::vector<mkldnn::primitive>& pipeline,