From 698071cc3389e6cd40fa36d8417446e62074ae26 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 13 Oct 2017 00:12:58 +0800 Subject: [PATCH] share mkldnn output value data if next layer is cpu device --- paddle/gserver/layers/MKLDNNConvLayer.cpp | 15 ++++++++++----- paddle/gserver/layers/MKLDNNFcLayer.cpp | 4 ++-- paddle/gserver/layers/MKLDNNLayer.h | 6 +----- paddle/gserver/layers/MKLDNNPoolLayer.cpp | 6 ++++-- paddle/gserver/tests/MKLDNNTester.cpp | 4 ++-- 5 files changed, 19 insertions(+), 16 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp index 93b35e46a7..92a1334aff 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.cpp +++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp @@ -243,7 +243,7 @@ void MKLDNNConvLayer::resetFwdPipeline( void MKLDNNConvLayer::resetInValue( std::shared_ptr& pd, MKLDNNMatrixPtr& in) { - const MatrixPtr& inMat = inputLayers_[0]->getOutput().value; + const MatrixPtr& inMat = inputLayers_[0]->getOutputValue(); in = MKLDNNMatrix::create(inMat, pd->src_primitive_desc()); // create buffer and reorder if input value do not match @@ -308,15 +308,20 @@ void MKLDNNConvLayer::resetOutValue( const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_); - if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) { + if (cpuOutVal_->getPrimitiveDesc() != pd->dst_primitive_desc()) { + out = MKLDNNMatrix::create(nullptr, pd->dst_primitive_desc()); cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); - CHECK(cvtOutVal_) << "should not be emptry"; + CHECK(cvtOutVal_) << "should not be empty"; } else { - // CPU output share the same data of MKLDNN output - cpuOut->setData(out->getData()); cpuOutVal_ = out; } + // when output is cpu device, change the mkldnn output value and make they + // share the same data. Then if next layer use inputlayer->getOuputValue() + // to achieve the input value, it will get the right data. + output_.value = std::dynamic_pointer_cast(cpuOutVal_); + return; } + output_.value = std::dynamic_pointer_cast(out); } void MKLDNNConvLayer::resetBwdWgtPD( diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index 11d3553abf..cf19a15568 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -180,10 +180,10 @@ void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt, void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) { out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_); if (!outputIsOnlyMKLDNN()) { - // fc cpu output value do not need create convert - // just share point + // fc cpu output value do not need create convert, just share data getOutput(CPU_DEVICE).value->setData(out->getData()); } + output_.value = std::dynamic_pointer_cast(out); } void MKLDNNFcLayer::resetFwdPD(std::shared_ptr& pd, diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 41d74d08a9..2c382a6d4f 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -127,10 +127,6 @@ public: pipelineFwd_.clear(); reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_); resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_); - if (outVal_) { - // change original output value to mkldnn output value - output_.value = std::dynamic_pointer_cast(outVal_); - } convertWeightsFromPaddle(); needResetBwd_ = true; } @@ -264,7 +260,7 @@ protected: */ virtual void resetOutGrad(MKLDNNMatrixPtr& out, mkldnn::memory::primitive_desc pd) { - CHECK(outputIsOnlyMKLDNN()) << "only support mixed with other device yet"; + CHECK(outputIsOnlyMKLDNN()) << "do not support mixed with other device yet"; mergeGrad_ = nullptr; out = MKLDNNMatrix::create(output_.grad, pd); if (outputMap_.size() <= 1) { diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.cpp b/paddle/gserver/layers/MKLDNNPoolLayer.cpp index 5de23e1378..5606aae80c 100644 --- a/paddle/gserver/layers/MKLDNNPoolLayer.cpp +++ b/paddle/gserver/layers/MKLDNNPoolLayer.cpp @@ -142,14 +142,16 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) { const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_); if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) { + out = MKLDNNMatrix::create(nullptr, out->getPrimitiveDesc()); cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); CHECK(cvtOutVal_) << "should not be emptry"; } else { - // CPU output share the same data of MKLDNN output - cpuOut->setData(out->getData()); cpuOutVal_ = out; } + output_.value = std::dynamic_pointer_cast(cpuOutVal_); + return; } + output_.value = std::dynamic_pointer_cast(outVal_); } void MKLDNNPoolLayer::resetFwdPD(std::shared_ptr& pd, diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp index f59618be9d..eaebdd671c 100644 --- a/paddle/gserver/tests/MKLDNNTester.cpp +++ b/paddle/gserver/tests/MKLDNNTester.cpp @@ -124,8 +124,8 @@ void MKLDNNTester::randomTopDiffs() { void MKLDNNTester::checkForward() { VLOG(MKLDNN_ALL) << "Check Forward"; printTopDatas(); - double delta = compareMatrix(dnnLayer_->getOutput(CPU_DEVICE).value, - refLayer_->getOutputValue()); + double delta = + compareMatrix(dnnLayer_->getOutputValue(), refLayer_->getOutputValue()); EXPECT_LE(fabs(delta), eps_); } -- GitLab