share mkldnn output value data if next layer is cpu device

698071cc · tensor-tang · 6715beaa · 698071cc · 698071cc · 698071cc
5 changed file
--- a/paddle/gserver/layers/MKLDNNConvLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp
@@ -243,7 +243,7 @@ void MKLDNNConvLayer::resetFwdPipeline(
 void MKLDNNConvLayer::resetInValue(
    std::shared_ptr<conv_fwd::primitive_desc>& pd, MKLDNNMatrixPtr& in) {
-  const MatrixPtr& inMat = inputLayers_[0]->getOutput().value;
+  const MatrixPtr& inMat = inputLayers_[0]->getOutputValue();
  in = MKLDNNMatrix::create(inMat, pd->src_primitive_desc());
  // create buffer and reorder if input value do not match
@@ -308,15 +308,20 @@ void MKLDNNConvLayer::resetOutValue(
    const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value;
    memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
    cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_);
-    if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) {
+    if (cpuOutVal_->getPrimitiveDesc() != pd->dst_primitive_desc()) {
+      out = MKLDNNMatrix::create(nullptr, pd->dst_primitive_desc());
      cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
-      CHECK(cvtOutVal_) << "should not be emptry";
+      CHECK(cvtOutVal_) << "should not be empty";
    } else {
-      // CPU output share the same data of MKLDNN output
-      cpuOut->setData(out->getData());
      cpuOutVal_ = out;
    }
+    // when output is cpu device, change the mkldnn output value and make they
+    // share the same data. Then if next layer use inputlayer->getOuputValue()
+    // to achieve the input value, it will get the right data.
+    output_.value = std::dynamic_pointer_cast<Matrix>(cpuOutVal_);
+    return;
  }
+  output_.value = std::dynamic_pointer_cast<Matrix>(out);
 }
 void MKLDNNConvLayer::resetBwdWgtPD(

--- a/paddle/gserver/layers/MKLDNNFcLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp
@@ -180,10 +180,10 @@ void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt,
 void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) {
  out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_);
  if (!outputIsOnlyMKLDNN()) {
-    // fc cpu output value do not need create convert
+    // fc cpu output value do not need create convert, just share data
-    // just share point
    getOutput(CPU_DEVICE).value->setData(out->getData());
  }
+  output_.value = std::dynamic_pointer_cast<Matrix>(out);
 }
 void MKLDNNFcLayer::resetFwdPD(std::shared_ptr<fc_fwd::primitive_desc>& pd,

--- a/paddle/gserver/layers/MKLDNNLayer.h
+++ b/paddle/gserver/layers/MKLDNNLayer.h
@@ -127,10 +127,6 @@ public:
        pipelineFwd_.clear();
        reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_);
        resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_);
-        if (outVal_) {
-          // change original output value to mkldnn output value
-          output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
-        }
        convertWeightsFromPaddle();
        needResetBwd_ = true;
      }
@@ -264,7 +260,7 @@ protected:
   */
  virtual void resetOutGrad(MKLDNNMatrixPtr& out,
                            mkldnn::memory::primitive_desc pd) {
-    CHECK(outputIsOnlyMKLDNN()) << "only support mixed with other device yet";
+    CHECK(outputIsOnlyMKLDNN()) << "do not support mixed with other device yet";
    mergeGrad_ = nullptr;
    out = MKLDNNMatrix::create(output_.grad, pd);
    if (outputMap_.size() <= 1) {

--- a/paddle/gserver/layers/MKLDNNPoolLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNPoolLayer.cpp
@@ -142,14 +142,16 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) {
    const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value;
    cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_);
    if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) {
+      out = MKLDNNMatrix::create(nullptr, out->getPrimitiveDesc());
      cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
      CHECK(cvtOutVal_) << "should not be emptry";
    } else {
-      // CPU output share the same data of MKLDNN output
-      cpuOut->setData(out->getData());
      cpuOutVal_ = out;
    }
+    output_.value = std::dynamic_pointer_cast<Matrix>(cpuOutVal_);
+    return;
  }
+  output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
 }
 void MKLDNNPoolLayer::resetFwdPD(std::shared_ptr<pool_fwd::primitive_desc>& pd,

--- a/paddle/gserver/tests/MKLDNNTester.cpp
+++ b/paddle/gserver/tests/MKLDNNTester.cpp
@@ -124,8 +124,8 @@ void MKLDNNTester::randomTopDiffs() {
 void MKLDNNTester::checkForward() {
  VLOG(MKLDNN_ALL) << "Check Forward";
  printTopDatas();
-  double delta = compareMatrix(dnnLayer_->getOutput(CPU_DEVICE).value,
+  double delta =
-                               refLayer_->getOutputValue());
+      compareMatrix(dnnLayer_->getOutputValue(), refLayer_->getOutputValue());
  EXPECT_LE(fabs(delta), eps_);
 }