diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp index 93b35e46a75313a31c1f379cde617c1d0d7ab68c..92a1334affba45e72022ec92811d8aefa78842d0 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.cpp +++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp @@ -243,7 +243,7 @@ void MKLDNNConvLayer::resetFwdPipeline( void MKLDNNConvLayer::resetInValue( std::shared_ptr& pd, MKLDNNMatrixPtr& in) { - const MatrixPtr& inMat = inputLayers_[0]->getOutput().value; + const MatrixPtr& inMat = inputLayers_[0]->getOutputValue(); in = MKLDNNMatrix::create(inMat, pd->src_primitive_desc()); // create buffer and reorder if input value do not match @@ -308,15 +308,20 @@ void MKLDNNConvLayer::resetOutValue( const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_); - if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) { + if (cpuOutVal_->getPrimitiveDesc() != pd->dst_primitive_desc()) { + out = MKLDNNMatrix::create(nullptr, pd->dst_primitive_desc()); cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); - CHECK(cvtOutVal_) << "should not be emptry"; + CHECK(cvtOutVal_) << "should not be empty"; } else { - // CPU output share the same data of MKLDNN output - cpuOut->setData(out->getData()); cpuOutVal_ = out; } + // when output is cpu device, change the mkldnn output value and make they + // share the same data. Then if next layer use inputlayer->getOuputValue() + // to achieve the input value, it will get the right data. + output_.value = std::dynamic_pointer_cast(cpuOutVal_); + return; } + output_.value = std::dynamic_pointer_cast(out); } void MKLDNNConvLayer::resetBwdWgtPD( diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index 11d3553abf7d5dbc2f259e382ed0b525c4747f55..cf19a155681f3a1ceb20af67245c8f2b8fa8fa73 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -180,10 +180,10 @@ void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt, void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) { out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_); if (!outputIsOnlyMKLDNN()) { - // fc cpu output value do not need create convert - // just share point + // fc cpu output value do not need create convert, just share data getOutput(CPU_DEVICE).value->setData(out->getData()); } + output_.value = std::dynamic_pointer_cast(out); } void MKLDNNFcLayer::resetFwdPD(std::shared_ptr& pd, diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 41d74d08a90589761ae93cfe06bceb33bf9c94cf..2c382a6d4f3761e3f994936368de2775510c7111 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -127,10 +127,6 @@ public: pipelineFwd_.clear(); reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_); resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_); - if (outVal_) { - // change original output value to mkldnn output value - output_.value = std::dynamic_pointer_cast(outVal_); - } convertWeightsFromPaddle(); needResetBwd_ = true; } @@ -264,7 +260,7 @@ protected: */ virtual void resetOutGrad(MKLDNNMatrixPtr& out, mkldnn::memory::primitive_desc pd) { - CHECK(outputIsOnlyMKLDNN()) << "only support mixed with other device yet"; + CHECK(outputIsOnlyMKLDNN()) << "do not support mixed with other device yet"; mergeGrad_ = nullptr; out = MKLDNNMatrix::create(output_.grad, pd); if (outputMap_.size() <= 1) { diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.cpp b/paddle/gserver/layers/MKLDNNPoolLayer.cpp index 5de23e1378836bd3baee1d9c8942a9a575c9dd06..5606aae80ce8e9a1e571d3c057c471b26a59d032 100644 --- a/paddle/gserver/layers/MKLDNNPoolLayer.cpp +++ b/paddle/gserver/layers/MKLDNNPoolLayer.cpp @@ -142,14 +142,16 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) { const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_); if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) { + out = MKLDNNMatrix::create(nullptr, out->getPrimitiveDesc()); cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); CHECK(cvtOutVal_) << "should not be emptry"; } else { - // CPU output share the same data of MKLDNN output - cpuOut->setData(out->getData()); cpuOutVal_ = out; } + output_.value = std::dynamic_pointer_cast(cpuOutVal_); + return; } + output_.value = std::dynamic_pointer_cast(outVal_); } void MKLDNNPoolLayer::resetFwdPD(std::shared_ptr& pd, diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp index f59618be9d09d146be52fb51cae84f4d24c15ef1..eaebdd671cfa1b37e5efe149588ca23fdc402a8e 100644 --- a/paddle/gserver/tests/MKLDNNTester.cpp +++ b/paddle/gserver/tests/MKLDNNTester.cpp @@ -124,8 +124,8 @@ void MKLDNNTester::randomTopDiffs() { void MKLDNNTester::checkForward() { VLOG(MKLDNN_ALL) << "Check Forward"; printTopDatas(); - double delta = compareMatrix(dnnLayer_->getOutput(CPU_DEVICE).value, - refLayer_->getOutputValue()); + double delta = + compareMatrix(dnnLayer_->getOutputValue(), refLayer_->getOutputValue()); EXPECT_LE(fabs(delta), eps_); }