From 1199aa6876fc8c6759e3e2be7bb89a597d88359a Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Mon, 16 Oct 2017 17:37:39 +0800 Subject: [PATCH] fix bug: clear grad and always share data when output has cpu and add activation in unit tests --- paddle/gserver/layers/MKLDNNConvLayer.cpp | 8 +++++--- paddle/gserver/layers/MKLDNNLayer.h | 19 ++++++++++++++++++- paddle/gserver/layers/MKLDNNPoolLayer.cpp | 8 +++++--- paddle/gserver/tests/test_MKLDNN.cpp | 3 +++ 4 files changed, 31 insertions(+), 7 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp index 8b67a1ef4f..26810a6483 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.cpp +++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp @@ -313,6 +313,7 @@ void MKLDNNConvLayer::resetOutValue( cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); CHECK(cvtOutVal_) << "should not be empty"; } else { + cpuOut->setData(output_.value->getData()); cpuOutVal_ = out; } // when output is cpu device, change the mkldnn output value and make them @@ -456,17 +457,18 @@ void MKLDNNConvLayer::resetOutGrad( MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc()); } else { const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; + // always share the same grad data of CPU output + // then the activation can get the right grad from output_.grad + output_.grad->setData(cpuOut->getData()); // same PrimitiveDesc with cpuInVal_ CHECK(cpuOutVal_); cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc()); // create reorder if primitive desc does not match if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) { - out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc()); + out = MKLDNNMatrix::create(nullptr, outVal_->getPrimitiveDesc()); cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out); CHECK(cvtOutGrad_); } else { - // share the same data of CPU output - output_.grad->setData(cpuOut->getData()); out = cpuOutGrad_; } } diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 5f9923da76..4e2753eba2 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -46,6 +46,9 @@ protected: // backward also need reset after reset forward handle bool needResetBwd_; + // is output only mkldnn + bool outputOnlyMKLDNN_; + // mkldnn engine, stream and primivtives mkldnn::engine engine_; std::shared_ptr stream_; @@ -141,6 +144,9 @@ public: updateInputData(); } + if (!outputOnlyMKLDNN_) { + clearGrads(); + } stream_->submit(pipelineFwd_); } @@ -389,7 +395,8 @@ protected: CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE) << "Only support other device is CPU yet"; } - return outputOtherDevice_.size() == 0; + outputOnlyMKLDNN_ = outputOtherDevice_.size() == 0; + return outputOnlyMKLDNN_; } /** @@ -398,6 +405,16 @@ protected: void setDevice(int id) { deviceId_ = id; } private: + /** + * clear all grad + */ + void clearGrads() { + output_.grad->zeroMem(); + for (size_t i = 0; i < outputOtherDevice_.size(); i++) { + outputOtherDevice_[i].grad->zeroMem(); + } + } + /** * Set deviceId of the params used in this layer. */ diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.cpp b/paddle/gserver/layers/MKLDNNPoolLayer.cpp index 5606aae80c..0e53e2d1b7 100644 --- a/paddle/gserver/layers/MKLDNNPoolLayer.cpp +++ b/paddle/gserver/layers/MKLDNNPoolLayer.cpp @@ -146,6 +146,7 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) { cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); CHECK(cvtOutVal_) << "should not be emptry"; } else { + cpuOut->setData(output_.value->getData()); cpuOutVal_ = out; } output_.value = std::dynamic_pointer_cast(cpuOutVal_); @@ -213,15 +214,16 @@ void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) { MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc()); } else { const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; + // always share the same grad data of CPU output + // then the activation can get the right grad from output_.grad + output_.grad->setData(cpuOut->getData()); cpuOutGrad_ = MKLDNNMatrix::create( cpuOut, memory::dims{bs_, oc_, oh_, ow_}, format::nchw, engine_); if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) { - out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc()); + out = MKLDNNMatrix::create(nullptr, outVal_->getPrimitiveDesc()); cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out); CHECK(cvtOutGrad_) << "should not be emptry"; } else { - // share the same data of CPU output - output_.grad->setData(cpuOut->getData()); out = cpuOutGrad_; } } diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp index a70b2f17f4..03515e9469 100644 --- a/paddle/gserver/tests/test_MKLDNN.cpp +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -46,6 +46,7 @@ struct testFcDesc { static void getMKLDNNFcConfig(TestConfig& cfg, const testFcDesc& pm) { cfg.layerConfig.set_type("mkldnn_fc"); + cfg.layerConfig.set_active_type("sigmoid"); cfg.layerConfig.set_size(pm.oc); cfg.inputDefs.push_back( {INPUT_DATA, @@ -86,6 +87,7 @@ struct testConvDesc { static void getMKLDNNConvConfig(TestConfig& cfg, const testConvDesc& pm) { cfg.layerConfig.set_type("mkldnn_conv"); + cfg.layerConfig.set_active_type("relu"); cfg.layerConfig.set_num_filters(pm.oc); cfg.layerConfig.set_size(pm.oc * pm.oh * pm.ow); cfg.layerConfig.set_shared_biases(true); @@ -158,6 +160,7 @@ struct testPoolDesc { static void getMKLDNNPoolConfig(TestConfig& cfg, const testPoolDesc& pm) { cfg.layerConfig.set_type("mkldnn_pool"); + cfg.layerConfig.set_active_type("relu"); cfg.layerConfig.set_size(pm.ic * pm.oh * pm.ow); cfg.inputDefs.push_back( {INPUT_DATA, -- GitLab