separate resetFwd and resetBwd to some sub functions

f2317b67 · tensor-tang · 66fdbd0c · f2317b67 · f2317b67
Showing with 433 addition and 188 deletion

paddle/gserver/layers/MKLDNNConvLayer.cpp paddle/gserver/layers/MKLDNNConvLayer.cpp +327 -186

paddle/gserver/layers/MKLDNNConvLayer.h paddle/gserver/layers/MKLDNNConvLayer.h +106 -2

未找到文件。
--- a/paddle/gserver/layers/MKLDNNConvLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp
@@ -18,9 +18,6 @@ limitations under the License. */
 using namespace mkldnn;  // NOLINT
 typedef memory::format format;
-typedef convolution_forward conv_fwd;
-typedef convolution_backward_weights conv_bwdWgt;
-typedef convolution_backward_data conv_bwdData;
 namespace paddle {
@@ -114,30 +111,76 @@ void MKLDNNConvLayer::resetFwd(std::vector<primitive>& pipeline,
                               MKLDNNMatrixPtr& wgt,
                               MKLDNNMatrixPtr& bias,
                               MKLDNNMatrixPtr& out) {
-  pipeline.clear();
+  resetFwdPD(fwdPD_);
-  bool hasBias = biases_ && biases_->getW();
-  biasVal_ = nullptr;
+  resetFwdBuffers(fwdPD_, in, wgt, bias, out);
+  resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out);
+  printValueFormatFlow();
+}
+void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline,
+                               MKLDNNMatrixPtr& in,
+                               MKLDNNMatrixPtr& wgt,
+                               MKLDNNMatrixPtr& bias,
+                               MKLDNNMatrixPtr& out) {
+  std::shared_ptr<conv_bwdWgt::primitive_desc> bwdWgtPD;
+  std::shared_ptr<conv_bwdData::primitive_desc> bwdDataPD;
+  resetBwdWgtPD(bwdWgtPD);
+  resetBwdDataPD(bwdDataPD);
+  resetBwdBuffers(bwdWgtPD, bwdDataPD, in, wgt, bias, out);
+  resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out);
+  printGradFormatFlow();
+}
+void MKLDNNConvLayer::updateInputData() {
+  cpuInVal_->setData(getInputValue(0, CPU_DEVICE)->getData());
+}
+void MKLDNNConvLayer::updateWeights(const UpdateCallback& callback) {
+  weight_->getParameterPtr()->incUpdate(callback);
+  if (biases_ && biases_->getWGrad()) {
+    biases_->getParameterPtr()->incUpdate(callback);
+  }
+}
+void MKLDNNConvLayer::loadConvSettings(memory::dims& wgt,
+                                       memory::dims& bias,
+                                       memory::dims& stride,
+                                       memory::dims& dilation,
+                                       memory::dims& padL,
+                                       memory::dims& padR) {
+  wgt = (gp_ == 1) ? memory::dims{oc_, ic_, fh_, fw_}
+                   : memory::dims{gp_, oc_ / gp_, ic_ / gp_, fh_, fw_};
+  bias = memory::dims{oc_};
+  stride = memory::dims{sh_, sw_};
+  padL = memory::dims{ph_, pw_};
+  padR = getPaddingR();
+  // note: mkldnn dilation start from 0
+  dilation = memory::dims{dh_ - 1, dw_ - 1};
+}
+void MKLDNNConvLayer::resetFwdPD(
+    std::shared_ptr<conv_fwd::primitive_desc>& pd) {
  // dims for conv
  memory::dims inDims = memory::dims{bs_, ic_, ih_, iw_};
  memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
-  memory::dims wgtDims =
+  memory::dims wgtDims, biasDims, strides, dilations, padL, padR;
-      (gp_ == 1) ? memory::dims{oc_, ic_, fh_, fw_}
+  loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR);
-                 : memory::dims{gp_, oc_ / gp_, ic_ / gp_, fh_, fw_};
-  memory::dims biasDims = memory::dims{oc_};
-  memory::dims strides = {sh_, sw_};
-  // note: mkldnn dilation start from 0
-  memory::dims dilations = {dh_ - 1, dw_ - 1};
-  memory::dims padding = {ph_, pw_};
-  memory::dims padR = getPaddingR();
-  // create forward handle
+  prop_kind pk = passType_ == PASS_TEST ? prop_kind::forward_scoring
-  prop_kind pk =
+                                        : prop_kind::forward_training;
-      passType_ == PASS_TEST ? prop_kind::forward : prop_kind::forward_training;
  algorithm algo = algorithm::convolution_direct;
  padding_kind padKind = padding_kind::zero;
  conv_fwd::desc fwdDesc =
-      hasBias ? conv_fwd::desc(pk,
+      biases_ && biases_->getW()
+          ? conv_fwd::desc(pk,
                           algo,
                           MKLDNNMatrix::createMemoryDesc(inDims),
                           MKLDNNMatrix::createMemoryDesc(wgtDims),
@@ -145,7 +188,7 @@ void MKLDNNConvLayer::resetFwd(std::vector<primitive>& pipeline,
                           MKLDNNMatrix::createMemoryDesc(outDims),
                           strides,
                           dilations,
-                               padding,
+                           padL,
                           padR,
                           padKind)
          : conv_fwd::desc(pk,
@@ -155,196 +198,309 @@ void MKLDNNConvLayer::resetFwd(std::vector<primitive>& pipeline,
                           MKLDNNMatrix::createMemoryDesc(outDims),
                           strides,
                           dilations,
-                               padding,
+                           padL,
                           padR,
                           padKind);
-  fwdPD_.reset(new conv_fwd::primitive_desc(fwdDesc, engine_));
+  pd.reset(new conv_fwd::primitive_desc(fwdDesc, engine_));
+}
-  // create mkldnn matrix
-  const MatrixPtr& wgtVal = weight_->getW();
+void MKLDNNConvLayer::resetFwdBuffers(
-  const MatrixPtr& inVal = inputLayers_[0]->getOutput().value;
+    std::shared_ptr<conv_fwd::primitive_desc>& pd,
-  const MatrixPtr& outVal = output_.value;
+    MKLDNNMatrixPtr& in,
-  wgt = MKLDNNMatrix::create(wgtVal, fwdPD_->weights_primitive_desc());
+    MKLDNNMatrixPtr& wgt,
-  in = MKLDNNMatrix::create(inVal, fwdPD_->src_primitive_desc());
+    MKLDNNMatrixPtr& bias,
-  out = MKLDNNMatrix::create(outVal, fwdPD_->dst_primitive_desc());
+    MKLDNNMatrixPtr& out) {
-  VLOG(MKLDNN_FMTS) << "Weight value format: " << wgtVal_->getFormat();
+  CHECK(pd);
-  if (hasBias) {
+  resetInValue(pd, in);
-    const MatrixPtr& biasVal = biases_->getW();
-    bias = MKLDNNMatrix::create(biasVal, biasDims, format::x, engine_);
+  resetWgtBiasValue(pd, wgt, bias);
-    CHECK(bias->getPrimitiveDesc() == fwdPD_->bias_primitive_desc())
-        << "bias primitive desc should always be equal";
+  resetOutValue(pd, out);
+}
+void MKLDNNConvLayer::resetFwdPipeline(
+    std::vector<primitive>& pipeline,
+    std::shared_ptr<conv_fwd::primitive_desc>& pd,
+    MKLDNNMatrixPtr& in,
+    MKLDNNMatrixPtr& wgt,
+    MKLDNNMatrixPtr& bias,
+    MKLDNNMatrixPtr& out) {
+  pipeline.clear();
+  if (cvtInVal_) {
+    pipeline.push_back(*cvtInVal_);
+  }
+  if (bias) {
+    fwd_.reset(new conv_fwd(*pd, *in, *wgt, *bias, *out));
+  } else {
+    fwd_.reset(new conv_fwd(*pd, *in, *wgt, *out));
+  }
+  pipeline.push_back(*fwd_);
+  if (cvtOutVal_) {
+    pipeline.push_back(*cvtOutVal_);
  }
+}
+void MKLDNNConvLayer::resetInValue(
+    std::shared_ptr<conv_fwd::primitive_desc>& pd, MKLDNNMatrixPtr& in) {
+  const MatrixPtr& inMat = inputLayers_[0]->getOutput().value;
+  in = MKLDNNMatrix::create(inMat, pd->src_primitive_desc());
-  // add reorder if input value do not match
+  // create buffer and reorder if input value do not match
+  cpuInVal_ = nullptr;
+  cvtInVal_ = nullptr;
  if (inputIsOnlyMKLDNN()) {
-    MKLDNNMatrixPtr dnnIn = std::dynamic_pointer_cast<MKLDNNMatrix>(inVal);
+    MKLDNNMatrixPtr dnnIn = std::dynamic_pointer_cast<MKLDNNMatrix>(inMat);
    CHECK(dnnIn) << "Input should be MKLDNNMatrix";
    if (dnnIn->getPrimitiveDesc() != in->getPrimitiveDesc()) {
      CHECK_EQ(dnnIn->getFormat(), format::nc);
-      CHECK(ih_ == 1 && iw_ == 1);
+      CHECK(ih_ == 1 && iw_ == 1) << "when input is nc format";
-      dnnIn = MKLDNNMatrix::create(inVal, inDims, format::nchw, engine_);
+      // create a new one with nchw format and same data
+      memory::dims inDims = memory::dims{bs_, ic_, 1, 1};
+      dnnIn = MKLDNNMatrix::create(inMat, inDims, format::nchw, engine_);
      CHECK(dnnIn->getPrimitiveDesc() == in->getPrimitiveDesc());
    }
    in = dnnIn;
  } else {
    const MatrixPtr& cpuIn = getInputValue(0, CPU_DEVICE);
+    memory::dims inDims = memory::dims{bs_, ic_, ih_, iw_};
    cpuInVal_ = MKLDNNMatrix::create(cpuIn, inDims, format::nchw, engine_);
    if (cpuInVal_->getPrimitiveDesc() != in->getPrimitiveDesc()) {
      // create new mkldnn matrix
-      in = MKLDNNMatrix::create(nullptr, fwdPD_->src_primitive_desc());
+      in = MKLDNNMatrix::create(nullptr, pd->src_primitive_desc());
      cvtInVal_ = MKLDNNMatrix::createReorder(cpuInVal_, in);
-      CHECK(cvtInVal_);
+      CHECK(cvtInVal_) << "should not be emptry";
-      pipeline.push_back(*cvtInVal_);
    } else {
      in = cpuInVal_;
    }
  }
+}
-  // add fwd handle
+void MKLDNNConvLayer::resetWgtBiasValue(
-  if (hasBias) {
+    std::shared_ptr<conv_fwd::primitive_desc>& pd,
-    fwd_.reset(new conv_fwd(*fwdPD_, *in, *wgt, *bias, *out));
+    MKLDNNMatrixPtr& wgt,
-  } else {
+    MKLDNNMatrixPtr& bias) {
-    fwd_.reset(new conv_fwd(*fwdPD_, *in, *wgt, *out));
+  wgt = MKLDNNMatrix::create(weight_->getW(), pd->weights_primitive_desc());
+  VLOG(MKLDNN_FMTS) << "Weight value format: " << wgt->getFormat();
+  bias = nullptr;
+  if (biases_ && biases_->getW()) {
+    bias = MKLDNNMatrix::create(biases_->getW(), pd->bias_primitive_desc());
  }
-  pipeline.push_back(*fwd_);
+}
+void MKLDNNConvLayer::resetOutValue(
+    std::shared_ptr<conv_fwd::primitive_desc>& pd, MKLDNNMatrixPtr& out) {
+  out = MKLDNNMatrix::create(output_.value, pd->dst_primitive_desc());
  // change original output value from cpu matrix to mkldnn matrix
  output_.value = std::dynamic_pointer_cast<Matrix>(out);
-  // add reorder if output value has cpu device and pd do not match
+  // create reorder if output value has cpu device and pd do not match
+  cpuOutVal_ = nullptr;
+  cpuOutVal_ = nullptr;
  if (!outputIsOnlyMKLDNN()) {
    const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value;
+    memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
    cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_);
    if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) {
      cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
-      CHECK(cvtOutVal_);
+      CHECK(cvtOutVal_) << "should not be emptry";
-      pipeline.push_back(*cvtOutVal_);
    } else {
-      // share data
+      // CPU output share the same data of MKLDNN output
      cpuOut->setData(out->getData());
      cpuOutVal_ = out;
    }
  }
-  printValueFormatFlow();
 }
-void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline,
+void MKLDNNConvLayer::resetBwdWgtPD(
-                               MKLDNNMatrixPtr& in,
+    std::shared_ptr<conv_bwdWgt::primitive_desc>& pd) {
-                               MKLDNNMatrixPtr& wgt,
+  memory::dims wgtDims, biasDims, strides, dilations, padL, padR;
-                               MKLDNNMatrixPtr& bias,
+  loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR);
-                               MKLDNNMatrixPtr& out) {
-  pipeline.clear();
-  bool hasBias = biases_ && biases_->getWGrad();
-  /// backward weight
+  // create backward weight using input, output and weight value memory desc
  CHECK(inVal_) << "Should have input value";
  CHECK(outVal_) << "Should have output value";
  CHECK(wgtVal_) << "Should have weight value";
-  memory::dims wgtDims =
-      (gp_ == 1) ? memory::dims{oc_, ic_, fh_, fw_}
-                 : memory::dims{gp_, oc_ / gp_, ic_ / gp_, fh_, fw_};
-  memory::dims strides = {sh_, sw_};
-  memory::dims dilations = {dh_ - 1, dw_ - 1};
-  memory::dims padding = {ph_, pw_};
-  memory::dims padR = getPaddingR();
-  // create backward handle
  algorithm algo = algorithm::convolution_direct;
  padding_kind padKind = padding_kind::zero;
-  auto bwdWgtDesc =
+  auto bwdWgtDesc = biasVal_ != nullptr
-      hasBias ? conv_bwdWgt::desc(algo,
+                        ? conv_bwdWgt::desc(algo,
                                            inVal_->getMemoryDesc(),
-                                  MKLDNNMatrix::createMemoryDesc(wgtDims),
+                                            wgtVal_->getMemoryDesc(),
                                            biasVal_->getMemoryDesc(),
                                            outVal_->getMemoryDesc(),
                                            strides,
-                                  padding,
+                                            padL,
                                            padR,
                                            padKind)
                        : conv_bwdWgt::desc(algo,
                                            inVal_->getMemoryDesc(),
-                                  MKLDNNMatrix::createMemoryDesc(wgtDims),
+                                            wgtVal_->getMemoryDesc(),
                                            outVal_->getMemoryDesc(),
                                            strides,
-                                  padding,
+                                            padL,
                                            padR,
                                            padKind);
+  pd.reset(new conv_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_));
-  auto bwdWgtPD = conv_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_);
+  CHECK(pd->src_primitive_desc() == inVal_->getPrimitiveDesc())
-  CHECK(bwdWgtPD.src_primitive_desc() == inVal_->getPrimitiveDesc())
      << "primitive desc of in value should equal";
-  CHECK(bwdWgtPD.diff_dst_primitive_desc() == outVal_->getPrimitiveDesc())
+  CHECK(pd->diff_dst_primitive_desc() == outVal_->getPrimitiveDesc())
      << "primitive desc of out grad should equal the out value";
-  CHECK(bwdWgtPD.diff_weights_primitive_desc() == wgtVal_->getPrimitiveDesc())
+  CHECK(pd->diff_weights_primitive_desc() == wgtVal_->getPrimitiveDesc())
      << "primitive desc of weight grad should equal the weight value";
+}
-  // create mkldnn matrix
+void MKLDNNConvLayer::resetBwdDataPD(
-  const MatrixPtr& wgtGrad = weight_->getWGrad();
+    std::shared_ptr<conv_bwdData::primitive_desc>& pd) {
-  const MatrixPtr& outGrad = output_.grad;
+  if (inputLayers_[0]->getOutput().grad == nullptr) {
-  wgt = MKLDNNMatrix::create(wgtGrad, bwdWgtPD.diff_weights_primitive_desc());
+    return;
-  out = MKLDNNMatrix::create(outGrad, bwdWgtPD.diff_dst_primitive_desc());
-  CHECK(wgt->getPrimitiveDesc() == wgtVal_->getPrimitiveDesc())
-      << "primitive desc of weight grad and value should be equal";
-  CHECK(out->getPrimitiveDesc() == outVal_->getPrimitiveDesc())
-      << "primitive desc of out grad and value should be equal";
-  VLOG(MKLDNN_FMTS) << "Backward weight, weight grad format: "
-                    << wgt->getFormat();
-  if (hasBias) {
-    const MatrixPtr& biasGrad = biases_->getWGrad();
-    bias = MKLDNNMatrix::create(biasGrad, bwdWgtPD.diff_bias_primitive_desc());
-    CHECK(bias->getPrimitiveDesc() == biasVal_->getPrimitiveDesc())
-        << "primitive desc of bias grad should equal the bias value";
  }
+  memory::dims wgtDims, biasDims, strides, dilations, padL, padR;
+  loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR);
+  CHECK(inVal_) << "Should have input value";
+  CHECK(outVal_) << "Should have output value";
+  // create backward data using input and output value memory desc
+  // but using weight memory desc with any format
+  auto bwdDataDesc = conv_bwdData::desc(algorithm::convolution_direct,
+                                        inVal_->getMemoryDesc(),
+                                        MKLDNNMatrix::createMemoryDesc(wgtDims),
+                                        outVal_->getMemoryDesc(),
+                                        strides,
+                                        padL,
+                                        padR,
+                                        padding_kind::zero);
+  pd.reset(new conv_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_));
+  CHECK(pd->diff_src_primitive_desc() == inVal_->getPrimitiveDesc())
+      << "primitive desc of in grad should equal the in value";
+  CHECK(pd->diff_dst_primitive_desc() == outVal_->getPrimitiveDesc())
+      << "primitive desc of out grad should equal";
+}
+void MKLDNNConvLayer::resetBwdBuffers(
+    std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
+    std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
+    MKLDNNMatrixPtr& in,
+    MKLDNNMatrixPtr& wgt,
+    MKLDNNMatrixPtr& bias,
+    MKLDNNMatrixPtr& out) {
+  CHECK(wgtPD);
+  resetOutGrad(wgtPD, out);
+  resetWgtBiasGrad(wgtPD, wgt, bias);
+  resetInGrad(dataPD, in);
+  resetWgtValBwdData(dataPD, wgtValBwdData_);
+}
+void MKLDNNConvLayer::resetBwdPipeline(
+    std::vector<primitive>& pipeline,
+    std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
+    std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
+    MKLDNNMatrixPtr& in,
+    MKLDNNMatrixPtr& wgt,
+    MKLDNNMatrixPtr& bias,
+    MKLDNNMatrixPtr& out) {
+  pipeline.clear();
+  if (cvtOutGrad_) {
+    pipeline.push_back(*cvtOutGrad_);
+  }
+  // add bwdWgt handle
+  if (bias) {
+    bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt, *bias));
+  } else {
+    bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt));
+  }
+  pipeline.push_back(*bwdWgt_);
+  if (dataPD == nullptr) {
+    return;
+  }
+  if (cvtWgtVal_) {
+    pipeline.push_back(*cvtWgtVal_);
+  }
+  // add bwdData handle
+  CHECK(wgtValBwdData_) << "Should have weight memory";
+  bwdData_.reset(new conv_bwdData(*dataPD, *out, *wgtValBwdData_, *in));
+  pipeline.push_back(*bwdData_);
+  if (cvtInGrad_) {
+    pipeline.push_back(*cvtInGrad_);
+  }
+}
+void MKLDNNConvLayer::resetOutGrad(
+    std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD, MKLDNNMatrixPtr& out) {
+  const MatrixPtr& outMat = output_.grad;
+  out = MKLDNNMatrix::create(outMat, wgtPD->diff_dst_primitive_desc());
+  CHECK(outVal_ != nullptr &&
+        out->getPrimitiveDesc() == outVal_->getPrimitiveDesc())
+      << "primitive desc of out grad and value should be equal";
  // TODO(TJ): merge outgrad
-  // add reorder if has user output grad
+  // create reorder if has output grad does not match
+  cpuOutGrad_ = nullptr;
+  cvtOutGrad_ = nullptr;
  if (!outputIsOnlyMKLDNN()) {
    const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
-    memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
    // same PrimitiveDesc with cpuInVal_
    CHECK(cpuOutVal_);
    cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc());
    if (cpuOutGrad_->getPrimitiveDesc() == out->getPrimitiveDesc()) {
-      outGrad->setData(cpuOut->getData());
+      outMat->setData(cpuOut->getData());
      out = cpuOutGrad_;
    } else {
      cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out);
      CHECK(cvtOutGrad_);
-      pipeline.push_back(*cvtOutGrad_);
    }
  }
+}
-  // add bwdWgt handle
+void MKLDNNConvLayer::resetWgtBiasGrad(
-  if (hasBias) {
+    std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
-    bwdWgt_.reset(new conv_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt, *bias));
+    MKLDNNMatrixPtr& wgt,
-  } else {
+    MKLDNNMatrixPtr& bias) {
-    bwdWgt_.reset(new conv_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt));
+  wgt = MKLDNNMatrix::create(weight_->getWGrad(),
-  }
+                             wgtPD->diff_weights_primitive_desc());
-  pipeline.push_back(*bwdWgt_);
+  CHECK(nullptr != wgtVal_ &&
+        wgt->getPrimitiveDesc() == wgtVal_->getPrimitiveDesc())
+      << "primitive desc of weight grad and value should be equal";
+  VLOG(MKLDNN_FMTS) << "weight grad format: " << wgt->getFormat();
-  /// backward data
+  if (biasVal_ == nullptr) {
-  const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad;
-  if (inGrad == nullptr) {
    return;
  }
+  bias = MKLDNNMatrix::create(biases_->getWGrad(),
+                              wgtPD->diff_bias_primitive_desc());
+  CHECK(bias->getPrimitiveDesc() == biasVal_->getPrimitiveDesc())
+      << "primitive desc of bias grad should equal the bias value";
+}
-  auto bwdDataDesc = conv_bwdData::desc(algo,
+void MKLDNNConvLayer::resetInGrad(
-                                        inVal_->getMemoryDesc(),
+    std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
-                                        MKLDNNMatrix::createMemoryDesc(wgtDims),
+    MKLDNNMatrixPtr& in) {
-                                        out->getMemoryDesc(),
+  if (dataPD == nullptr) {
-                                        strides,
+    return;
-                                        padding,
+  }
-                                        padR,
-                                        padKind);
-  auto bwdDataPD = conv_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_);
-  CHECK(bwdDataPD.diff_src_primitive_desc() == inVal_->getPrimitiveDesc())
-      << "primitive desc of in grad should equal the in value";
-  CHECK(bwdDataPD.diff_dst_primitive_desc() == out->getPrimitiveDesc())
-      << "primitive desc of out grad should equal";
-  // create mkldnn matrix inGrad_ and reorder if necessary
  // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done
-  in = MKLDNNMatrix::create(inGrad, bwdDataPD.diff_src_primitive_desc());
+  in = MKLDNNMatrix::create(inputLayers_[0]->getOutput().grad,
+                            dataPD->diff_src_primitive_desc());
+  CHECK(nullptr != inVal_ &&
+        in->getPrimitiveDesc() == inVal_->getPrimitiveDesc())
+      << "primitive desc of input grad and value should be equal";
+  // create reorder if has output grad does not match
+  cpuInGrad_ = nullptr;
  cvtInGrad_ = nullptr;
  if (!inputIsOnlyMKLDNN()) {
    const MatrixPtr& cpuIn = getInputGrad(0, CPU_DEVICE);
@@ -360,43 +516,28 @@ void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline,
      in = cpuInGrad_;
    }
  }
+}
-  // create new weight value for backward data, and reorder if necessary
+void MKLDNNConvLayer::resetWgtValBwdData(
+    std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
+    MKLDNNMatrixPtr& wgt) {
+  if (dataPD == nullptr) {
+    return;
+  }
+  // create new weight value for backward data, and create reorder if necessary
  // since the primitive_desc would be different with wgtVal_
-  if (bwdDataPD.weights_primitive_desc() != wgtVal_->getPrimitiveDesc()) {
+  CHECK(wgtVal_) << "should have weight value";
+  if (dataPD->weights_primitive_desc() != wgtVal_->getPrimitiveDesc()) {
    wgtValBwdData_ =
-        MKLDNNMatrix::create(nullptr, bwdDataPD.weights_primitive_desc());
+        MKLDNNMatrix::create(nullptr, dataPD->weights_primitive_desc());
    cvtWgtVal_ = MKLDNNMatrix::createReorder(wgtVal_, wgtValBwdData_);
    CHECK(cvtWgtVal_);
-    pipeline.push_back(*cvtWgtVal_);
  } else {
    wgtValBwdData_ = wgtVal_;
  }
-  VLOG(MKLDNN_FMTS) << "Backward data, weight value format: "
+  VLOG(MKLDNN_FMTS) << "weight value format for backward data"
                    << wgtValBwdData_->getFormat();
-  // add bwdData handle
-  CHECK(wgtValBwdData_) << "Should have weight memory";
-  bwdData_.reset(new conv_bwdData(bwdDataPD, *out, *wgtValBwdData_, *in));
-  pipeline.push_back(*bwdData_);
-  // add ingrad reorder after bwdData
-  if (cvtInGrad_) {
-    pipeline.push_back(*cvtInGrad_);
-  }
-  printGradFormatFlow();
-}
-void MKLDNNConvLayer::updateInputData() {
-  cpuInVal_->setData(getInputValue(0, CPU_DEVICE)->getData());
-}
-void MKLDNNConvLayer::updateWeights(const UpdateCallback& callback) {
-  weight_->getParameterPtr()->incUpdate(callback);
-  if (biases_ && biases_->getWGrad()) {
-    biases_->getParameterPtr()->incUpdate(callback);
-  }
 }
 }  // namespace paddle
--- a/paddle/gserver/layers/MKLDNNConvLayer.h
+++ b/paddle/gserver/layers/MKLDNNConvLayer.h
@@ -18,6 +18,9 @@ limitations under the License. */
 #include "mkldnn.hpp"
 namespace paddle {
+typedef mkldnn::convolution_forward conv_fwd;
+typedef mkldnn::convolution_backward_weights conv_bwdWgt;
+typedef mkldnn::convolution_backward_data conv_bwdData;
 /**
 * @brief A subclass of MKLDNNLayer conv layer.
@@ -43,7 +46,7 @@ protected:
  std::shared_ptr<mkldnn::reorder> cvtWgtVal_;
  // save forward primitive_desc, which can be used backward
-  std::shared_ptr<mkldnn::convolution_forward::primitive_desc> fwdPD_;
+  std::shared_ptr<conv_fwd::primitive_desc> fwdPD_;
  // MKLDNNMatrixPtr which should be created from CPU Device
  MKLDNNMatrixPtr cpuInVal_;
@@ -99,7 +102,6 @@ public:
  void convertWeightsToPaddle() override;
-protected:
  void printSizeInfo() override {
    MKLDNNLayer::printSizeInfo();
    VLOG(MKLDNN_SIZES) << getName() << ": fh: " << fh_ << ", fw: " << fw_
@@ -116,6 +118,7 @@ protected:
      VLOG(MKLDNN_FMTS) << " >>> " << cpuOutVal_->getFormat();
    }
  }
  void printGradFormatFlow() override {
    if (cpuInGrad_) {
      VLOG(MKLDNN_FMTS) << cpuInGrad_->getFormat() << " <<<";
@@ -126,6 +129,107 @@ protected:
    }
  }
+protected:
+  /**
+   * load the dims settings of this conv
+   */
+  void loadConvSettings(mkldnn::memory::dims& wgt,
+                        mkldnn::memory::dims& bias,
+                        mkldnn::memory::dims& stride,
+                        mkldnn::memory::dims& dilation,
+                        mkldnn::memory::dims& padL,
+                        mkldnn::memory::dims& padR);
+  /**
+   * reset the forward primitive descriptor.
+   */
+  void resetFwdPD(std::shared_ptr<conv_fwd::primitive_desc>& pd);
+  /**
+   * reset the MKLDNNMatrix buffers used in forward.
+   */
+  void resetFwdBuffers(std::shared_ptr<conv_fwd::primitive_desc>& pd,
+                       MKLDNNMatrixPtr& in,
+                       MKLDNNMatrixPtr& wgt,
+                       MKLDNNMatrixPtr& bias,
+                       MKLDNNMatrixPtr& out);
+  /**
+   * reset the forward pipeline.
+   */
+  void resetFwdPipeline(std::vector<mkldnn::primitive>& pipeline,
+                        std::shared_ptr<conv_fwd::primitive_desc>& pd,
+                        MKLDNNMatrixPtr& in,
+                        MKLDNNMatrixPtr& wgt,
+                        MKLDNNMatrixPtr& bias,
+                        MKLDNNMatrixPtr& out);
+  /**
+   * reset MKLDNNMatrix of input value
+   */
+  void resetInValue(std::shared_ptr<conv_fwd::primitive_desc>& pd,
+                    MKLDNNMatrixPtr& in);
+  /**
+   * reset MKLDNNMatrix of weight and bias value
+   */
+  void resetWgtBiasValue(std::shared_ptr<conv_fwd::primitive_desc>& pd,
+                         MKLDNNMatrixPtr& wgt,
+                         MKLDNNMatrixPtr& bias);
+  /**
+   * reset MKLDNNMatrix of output value
+   */
+  void resetOutValue(std::shared_ptr<conv_fwd::primitive_desc>& pd,
+                     MKLDNNMatrixPtr& out);
+  /**
+   * reset the backward weight primitive descriptor.
+   */
+  void resetBwdWgtPD(std::shared_ptr<conv_bwdWgt::primitive_desc>& pd);
+  /**
+   * reset the backward data primitive descriptor.
+   */
+  void resetBwdDataPD(std::shared_ptr<conv_bwdData::primitive_desc>& pd);
+  /**
+   * reset the MKLDNNMatrix buffers used in backward.
+   */
+  void resetBwdBuffers(std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
+                       std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
+                       MKLDNNMatrixPtr& in,
+                       MKLDNNMatrixPtr& wgt,
+                       MKLDNNMatrixPtr& bias,
+                       MKLDNNMatrixPtr& out);
+  /**
+   * reset the backward pipeline.
+   */
+  void resetBwdPipeline(std::vector<mkldnn::primitive>& pipeline,
+                        std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
+                        std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
+                        MKLDNNMatrixPtr& in,
+                        MKLDNNMatrixPtr& wgt,
+                        MKLDNNMatrixPtr& bias,
+                        MKLDNNMatrixPtr& out);
+  /**
+   * reset MKLDNNMatrix of output grad
+   */
+  void resetOutGrad(std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
+                    MKLDNNMatrixPtr& out);
+  /**
+   * reset MKLDNNMatrix of weight and bias grad
+   */
+  void resetWgtBiasGrad(std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
+                        MKLDNNMatrixPtr& wgt,
+                        MKLDNNMatrixPtr& bias);
+  /**
+   * reset MKLDNNMatrix of input grad
+   */
+  void resetInGrad(std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
+                   MKLDNNMatrixPtr& in);
+  /**
+   * reset MKLDNNMatrix of weight value for backward data
+   * since the primitive_desc would be different with wgtVal_
+   */
+  void resetWgtValBwdData(std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
+                          MKLDNNMatrixPtr& wgt);
  /**
   * get padding_r according to
   * https://github.com/01org/mkl-dnn/blob/master/tests/gtests/