diff --git a/paddle/gserver/layers/MKLDNNAddtoLayer.cpp b/paddle/gserver/layers/MKLDNNAddtoLayer.cpp index 0f2b67fd758ec1513f42c4cb1a36f2f3915f4740..39bffc26f7ddcd159130c492115b41080e32ce7f 100644 --- a/paddle/gserver/layers/MKLDNNAddtoLayer.cpp +++ b/paddle/gserver/layers/MKLDNNAddtoLayer.cpp @@ -38,12 +38,13 @@ bool MKLDNNAddtoLayer::init(const LayerMap& layerMap, } void MKLDNNAddtoLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { CHECK_EQ(layerSize_, getSize()) << "this layer size can not be changed"; reshapeInput(bs, ih, iw); ic = inputLayers_[0]->getSize() / ih / iw; CHECK_EQ((size_t)ic * ih * iw, inputLayers_[0]->getSize()); - CHECK_EQ(inputElemenCnt_, (size_t)bs * ic * ih * iw); + CHECK_EQ(inputLayers_[0]->getOutputValue()->getElementCnt(), + (size_t)bs * ic * ih * iw); for (size_t i = 0; i < inputLayers_.size(); i++) { CHECK_EQ(int64_t(bs), inputLayers_[i]->getOutput().getBatchSize()); CHECK_EQ(layerSize_, inputLayers_[i]->getSize()); @@ -57,47 +58,43 @@ void MKLDNNAddtoLayer::reshape( } void MKLDNNAddtoLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetFwdBuffers(inVals_, bias, out); - in = inVals_[0]; + resetFwdBuffers(inputs, biasVal_, out); std::shared_ptr fwdPD; std::shared_ptr biasPD; - resetFwdPD(fwdPD, biasPD, inVals_, bias, out); + resetFwdPD(fwdPD, biasPD, inputs, biasVal_, out); - resetFwdPipeline(pipeline, fwdPD, biasPD, inVals_, bias, out); + resetFwdPipeline(pipeline, fwdPD, biasPD, inputs, biasVal_, out); } void MKLDNNAddtoLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetBwdBuffers(inGrads_, bias, out); - in = inGrads_[0]; + resetBwdBuffers(inputs, biasGrad_, out); // backward only need share output grad to input grad - for (size_t i = 0; i < inGrads_.size(); i++) { - if (inGrads_[i] != nullptr) { - inGrads_[i] = out; - inputLayers_[i]->getOutputGrad()->setData(inGrads_[i]->getData()); + for (size_t i = 0; i < inputs.size(); i++) { + if (inputs[i] != nullptr) { + inputs[i] = out; + inputLayers_[i]->getOutputGrad()->setData(inputs[i]->getData()); } } // backward bias bwdBias_ = nullptr; - if (bias) { + if (biasGrad_) { std::vector scales(bs_, 1.0); - std::vector srcPDs(bs_, bias->getPrimitiveDesc()); - auto biasPD = sum::primitive_desc(bias->getMemoryDesc(), scales, srcPDs); + std::vector srcPDs(bs_, + biasGrad_->getPrimitiveDesc()); + auto biasPD = + sum::primitive_desc(biasGrad_->getMemoryDesc(), scales, srcPDs); std::vector srcs; for (size_t i = 0; i < grads_.size(); ++i) { srcs.push_back(*(grads_[i])); } - bwdBias_.reset(new sum(biasPD, srcs, *bias)); + bwdBias_.reset(new sum(biasPD, srcs, *biasGrad_)); pipeline.push_back(*bwdBias_); } } @@ -208,7 +205,7 @@ void MKLDNNAddtoLayer::resetBwdBuffers(std::vector& inputs, inputs.resize(inputLayers_.size()); for (size_t i = 0; i < inputs.size(); i++) { - resetInGrad(inputs[i], inVal_->getPrimitiveDesc(), i); + resetInGrad(inputs[i], inVals_[i]->getPrimitiveDesc(), i); CHECK_PRIMITIVE_DESC_EQ(inputs[i], out->getPrimitiveDesc()); } diff --git a/paddle/gserver/layers/MKLDNNAddtoLayer.h b/paddle/gserver/layers/MKLDNNAddtoLayer.h index 24504b7b4f50726e2b2757ca3029461cdc27b411..0ea3e208e5fab8cbed8b53390a9381e6f2bb5733 100644 --- a/paddle/gserver/layers/MKLDNNAddtoLayer.h +++ b/paddle/gserver/layers/MKLDNNAddtoLayer.h @@ -26,9 +26,6 @@ namespace paddle { */ class MKLDNNAddtoLayer : public MKLDNNLayer { protected: - std::vector inVals_; - std::vector inGrads_; - // layer size == ic * ih * iw == oc * oh *ow, and can not be changed size_t layerSize_; @@ -50,52 +47,19 @@ public: const ParameterMap& parameterMap) override; void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void updateWeights(const UpdateCallback& callback) override; - void printValueFormat() override { - for (size_t i = 0; i < inVals_.size(); ++i) { - VLOG(MKLDNN_FMTS) << i << " input: " << inVals_[i]->getFormat() << " >>>"; - } - if (outVal_) { - VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> "; - } - if (extOutVal_) { - VLOG(MKLDNN_FMTS) << extOutVal_->getFormat(); - } - } - - void printGradFormat() override { - if (extOutGrad_) { - VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat(); - } - if (outGrad_) { - VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< "; - } - for (size_t i = 0; i < inGrads_.size(); ++i) { - VLOG(MKLDNN_FMTS) << i << " input: " << inGrads_[i]->getFormat() << "<<<"; - } - } - protected: - /** - * Forward functions: reset buffers(inputs, output, bias), - * reset primitive descriptor, - * reset pipeline. - */ void resetFwdBuffers(std::vector& inputs, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); @@ -110,17 +74,10 @@ protected: std::vector& inputs, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); - - /** - * Backward functions: reset buffers(inputs, output, bias) - */ void resetBwdBuffers(std::vector& inputs, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); - /** - * prepare for bias - */ void prepareBias(MKLDNNMatrixPtr& bias, const MatrixPtr& biasMat, const MKLDNNMatrixPtr& out, diff --git a/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp b/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp index 071bdf54d5dc9538d5ced580a73b9c0fbcea41fb..d66c361ae05e4a1089786e4620d2eb2218a8a29c 100644 --- a/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp +++ b/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp @@ -116,21 +116,20 @@ void MKLDNNBatchNormLayer::calMovingMeanAndVar() { } void MKLDNNBatchNormLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { reshapeInput(bs, ih, iw); oh = ih; ow = iw; // ic_ and oc can not be changed - CHECK_EQ(inputElemenCnt_ / bs / ih / iw, (size_t)ic) + CHECK_EQ((size_t)ic, + inputLayers_[0]->getOutputValue()->getElementCnt() / bs / ih / iw) << "Input channel can not be changed"; reshapeOutput(oh, ow); resizeOutput(bs, oc * oh * ow); } void MKLDNNBatchNormLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { // In training phase, it will always calculate mean and var, // so useGlobalStats must be false. @@ -140,25 +139,23 @@ void MKLDNNBatchNormLayer::resetFwd(std::vector& pipeline, useGlobalStats_ = false; } - resetFwdBuffers(in, wgt, out); + resetFwdBuffers(inputs[0], wgtVal_, out); - resetFwdPD(fwdPD_, in, wgt, out); + resetFwdPD(fwdPD_, inputs[0], wgtVal_, out); - resetFwdPipeline(pipeline, fwdPD_, in, wgt, out); + resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, out); } void MKLDNNBatchNormLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { std::shared_ptr pd; - resetBwdBuffers(in, wgt, out); + resetBwdBuffers(inputs[0], wgtGrad_, out); - resetBwdPD(pd, in, wgt, out); + resetBwdPD(pd, inputs[0], wgtGrad_, out); - resetBwdPipeline(pipeline, pd, in, wgt, out); + resetBwdPipeline(pipeline, pd, inputs[0], wgtGrad_, out); } void MKLDNNBatchNormLayer::forward(PassType passType) { @@ -260,9 +257,9 @@ void MKLDNNBatchNormLayer::resetFwdPipeline( void MKLDNNBatchNormLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& out) { - CHECK(inVal_ && outVal_); + CHECK(inVals_[0] && outVal_); resetOutGrad(out, outVal_->getPrimitiveDesc()); - resetInGrad(in, inVal_->getPrimitiveDesc()); + resetInGrad(in, inVals_[0]->getPrimitiveDesc()); if (gradScaleShift_) { CHECK(wgtVal_); resetWithMatrix(wgt, gradScaleShift_, wgtVal_->getPrimitiveDesc()); @@ -297,11 +294,12 @@ void MKLDNNBatchNormLayer::resetBwdPipeline( if (pd == nullptr) { return; } - CHECK(inVal_); + CHECK(inVals_[0]); bwdData_.reset( wgt && wgtVal_ - ? new bn_bwd(*pd, *inVal_, *mean_, *var_, *out, *wgtVal_, *in, *wgt) - : new bn_bwd(*pd, *inVal_, *mean_, *var_, *out, *in)); + ? new bn_bwd( + *pd, *inVals_[0], *mean_, *var_, *out, *wgtVal_, *in, *wgt) + : new bn_bwd(*pd, *inVals_[0], *mean_, *var_, *out, *in)); pipeline.push_back(*bwdData_); } diff --git a/paddle/gserver/layers/MKLDNNBatchNormLayer.h b/paddle/gserver/layers/MKLDNNBatchNormLayer.h index 456c0424ecb8dde17f98a900c5d77268cc672e34..387c58f02298b0441cc3bbbc4879eed6d892164c 100644 --- a/paddle/gserver/layers/MKLDNNBatchNormLayer.h +++ b/paddle/gserver/layers/MKLDNNBatchNormLayer.h @@ -73,18 +73,14 @@ public: void forward(PassType passType) override; void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void updateWeights(const UpdateCallback& callback) override; @@ -98,11 +94,7 @@ protected: * moving = moving * AvgFraction + local * (1 - AvgFraction) */ void calMovingMeanAndVar(); - /** - * Forward functions: reset buffers(input, weight, output), - * reset primitive descriptor, - * reset pipeline. - */ + void resetFwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& out); @@ -115,12 +107,6 @@ protected: MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& out); - - /** - * Backward functions: reset buffers(input, weight, output), - * reset primitive descriptor, - * reset pipeline. - */ void resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& out); diff --git a/paddle/gserver/layers/MKLDNNConcatLayer.cpp b/paddle/gserver/layers/MKLDNNConcatLayer.cpp index c9099297cc5c741fbae0b42f21b988e6c561ef11..44bb0883b89c712d70e2d4fdfe16bdfde86f81b7 100644 --- a/paddle/gserver/layers/MKLDNNConcatLayer.cpp +++ b/paddle/gserver/layers/MKLDNNConcatLayer.cpp @@ -32,17 +32,16 @@ bool MKLDNNConcatLayer::init(const LayerMap& layerMap, } void MKLDNNConcatLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { reshapeInput(bs, ih, iw); ic = inputLayers_[0]->getSize() / ih / iw; CHECK_EQ((size_t)ic * ih * iw, inputLayers_[0]->getSize()); - CHECK_EQ(inputElemenCnt_, (size_t)bs * ic * ih * iw); + CHECK_EQ(inputLayers_[0]->getOutputValue()->getElementCnt(), + (size_t)bs * ic * ih * iw); CHECK_GT(inputLayers_.size(), 1UL); channels_.resize(inputLayers_.size()); channels_[0] = ic; - // need change the output channel, so use oc_ instead - // TODO(TJ): change API, use &oc - oc_ = ic; + oc = ic; for (size_t i = 1; i < inputLayers_.size(); i++) { int batchsize, height, witdh; reshapeInput(batchsize, height, witdh, i); @@ -52,37 +51,31 @@ void MKLDNNConcatLayer::reshape( channels_[i] = inputLayers_[i]->getSize() / height / witdh; CHECK_EQ((size_t)channels_[i] * height * witdh, inputLayers_[i]->getSize()); - oc_ += channels_[i]; + oc += channels_[i]; } oh = ih; ow = iw; reshapeOutput(oh, ow); - resizeOutput(bs, oc_ * oh * ow); + resizeOutput(bs, oc * oh * ow); } void MKLDNNConcatLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetFwdBuffers(inVals_, out); - in = inVals_[0]; + resetFwdBuffers(inputs, out); std::shared_ptr fwdPD; - resetFwdPD(fwdPD, inVals_, out); + resetFwdPD(fwdPD, inputs, out); - resetFwdPipeline(pipeline, fwdPD, inVals_, out); + resetFwdPipeline(pipeline, fwdPD, inputs, out); } void MKLDNNConcatLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetBwdBuffers(inGrads_, out); - in = inGrads_[0]; + resetBwdBuffers(inputs, out); - resetBwdPipeline(pipeline, bwds_, inGrads_, out); + resetBwdPipeline(pipeline, bwds_, inputs, out); } void MKLDNNConcatLayer::resetFwdBuffers(std::vector& inputs, @@ -90,10 +83,7 @@ void MKLDNNConcatLayer::resetFwdBuffers(std::vector& inputs, inputs.resize(inputLayers_.size()); bool has8c = false, has16c = false, hasnc = false; for (size_t i = 0; i < inputs.size(); i++) { - // resetInValue will use ic_ so temporary change as current input's channel - // TODO(TJ): change ic_ as vector then can remove channels_ - ic_ = channels_[i]; - resetInValue(inputs[i], nullptr, i); + resetInValue(inputs[i], nullptr, i, channels_[i]); CHECK(inputs[i]); auto dm = inputs[i]->getDims(); // inputs format can be different, but ndims must equal @@ -114,8 +104,6 @@ void MKLDNNConcatLayer::resetFwdBuffers(std::vector& inputs, has16c = true; } } - // change back, ic_ always save the input 0 size - ic_ = channels_[0]; format outFmt; if (has16c && oc_ % 16 == 0) { @@ -168,14 +156,9 @@ void MKLDNNConcatLayer::resetBwdBuffers(std::vector& inputs, inputs.resize(inputLayers_.size()); for (size_t i = 0; i < inputs.size(); i++) { CHECK(inVals_[i]); - // resetInGrad will use inVal_ - // TODO(TJ): change move inVals_ to MKLDNNLayer ans remove inVal_ - inVal_ = inVals_[i]; resetInGrad(inputs[i], inVals_[i]->getPrimitiveDesc(), i); CHECK_PRIMITIVE_DESC_EQ(inputs[i], inVals_[i]->getPrimitiveDesc()); } - // change back, inVal_ always save the input 0 - inVal_ = inVals_[0]; } void MKLDNNConcatLayer::resetBwdPipeline( diff --git a/paddle/gserver/layers/MKLDNNConcatLayer.h b/paddle/gserver/layers/MKLDNNConcatLayer.h index d5749d327e4259b81541a234f48a4538ab035fe4..37f3a26c5ed5db10cdba507368874c9557fb75ef 100644 --- a/paddle/gserver/layers/MKLDNNConcatLayer.h +++ b/paddle/gserver/layers/MKLDNNConcatLayer.h @@ -26,8 +26,6 @@ namespace paddle { */ class MKLDNNConcatLayer : public MKLDNNLayer { protected: - std::vector inVals_; - std::vector inGrads_; std::vector> bwds_; // input channel numbers std::vector channels_; @@ -47,18 +45,14 @@ public: const ParameterMap& parameterMap) override; void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void printSizeInfo() override { @@ -72,38 +66,16 @@ public: << ", " << ow_; } - void printValueFormat() override { - for (size_t i = 0; i < inVals_.size(); ++i) { - VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName() - << ": " << inVals_[i]->getFormat() << " >>>"; - } - if (outVal_) { - VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> "; - } - if (extOutVal_) { - VLOG(MKLDNN_FMTS) << extOutVal_->getFormat(); - } - } - - void printGradFormat() override { - if (extOutGrad_) { - VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat(); - } - if (outGrad_) { - VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< "; - } - for (size_t i = 0; i < inGrads_.size(); ++i) { - VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName() - << ": " << inGrads_[i]->getFormat() << "<<<"; + size_t keepCondition() { + // reset when the total element size of all inputs changed + size_t totalSize = inputLayers_[0]->getOutputValue()->getElementCnt(); + for (size_t i = 1; i < inputLayers_.size(); ++i) { + totalSize += inputLayers_[i]->getOutputValue()->getElementCnt(); } + return totalSize; } protected: - /** - * Forward functions: reset buffers(inputs, output, bias), - * reset primitive descriptor, - * reset pipeline. - */ void resetFwdBuffers(std::vector& inputs, MKLDNNMatrixPtr& out); void resetFwdPD(std::shared_ptr& pd, @@ -113,11 +85,6 @@ protected: std::shared_ptr& pd, std::vector& inputs, MKLDNNMatrixPtr& out); - - /** - * Backward functions: reset buffers(inputs, output, bias) - * reset primitives and pipeline - */ void resetBwdBuffers(std::vector& inputs, MKLDNNMatrixPtr& out); void resetBwdPipeline(std::vector& pipeline, diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp index 8aa54e0a9efa7adb766cbb6009f6a29410c6ae7d..ab1d0f7b049a349c00c6e23deb37d789382de64f 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.cpp +++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp @@ -90,7 +90,7 @@ void MKLDNNConvLayer::convertWeightsToPaddle() { } void MKLDNNConvLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { reshapeInput(bs, ih, iw); // cal output sizes @@ -105,21 +105,17 @@ void MKLDNNConvLayer::reshape( } void MKLDNNConvLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { resetFwdPD(fwdPD_); - resetFwdBuffers(fwdPD_, in, wgt, bias, out); + resetFwdBuffers(fwdPD_, inputs[0], wgtVal_, biasVal_, out); - resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out); + resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, biasVal_, out); } void MKLDNNConvLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { std::shared_ptr bwdWgtPD; std::shared_ptr bwdDataPD; @@ -128,9 +124,10 @@ void MKLDNNConvLayer::resetBwd(std::vector& pipeline, resetBwdDataPD(bwdDataPD); - resetBwdBuffers(bwdWgtPD, bwdDataPD, in, wgt, bias, out); + resetBwdBuffers(bwdWgtPD, bwdDataPD, inputs[0], wgtGrad_, biasGrad_, out); - resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out); + resetBwdPipeline( + pipeline, bwdWgtPD, bwdDataPD, inputs[0], wgtGrad_, biasGrad_, out); } void MKLDNNConvLayer::updateWeights(const UpdateCallback& callback) { @@ -236,14 +233,14 @@ void MKLDNNConvLayer::resetBwdWgtPD( loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); // create backward weight using input, output and weight value memory desc - CHECK(inVal_) << "Should have internal input value"; + CHECK(inVals_[0]) << "Should have internal input value"; CHECK(outVal_) << "Should have internal output value"; CHECK(wgtVal_) << "Should have weight value"; algorithm algo = algorithm::convolution_direct; padding_kind padKind = padding_kind::zero; auto bwdWgtDesc = biasVal_ != nullptr ? conv_bwdWgt::desc(algo, - inVal_->getMemoryDesc(), + inVals_[0]->getMemoryDesc(), wgtVal_->getMemoryDesc(), biasVal_->getMemoryDesc(), outVal_->getMemoryDesc(), @@ -252,7 +249,7 @@ void MKLDNNConvLayer::resetBwdWgtPD( padR, padKind) : conv_bwdWgt::desc(algo, - inVal_->getMemoryDesc(), + inVals_[0]->getMemoryDesc(), wgtVal_->getMemoryDesc(), outVal_->getMemoryDesc(), strides, @@ -260,7 +257,7 @@ void MKLDNNConvLayer::resetBwdWgtPD( padR, padKind); pd.reset(new conv_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_)); - CHECK_PRIMITIVE_DESC_EQ(inVal_, pd->src_primitive_desc()); + CHECK_PRIMITIVE_DESC_EQ(inVals_[0], pd->src_primitive_desc()); CHECK_PRIMITIVE_DESC_EQ( outVal_, pd->diff_dst_primitive_desc(), @@ -280,12 +277,12 @@ void MKLDNNConvLayer::resetBwdDataPD( memory::dims wgtDims, biasDims, strides, dilations, padL, padR; loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); - CHECK(inVal_) << "Should have internal input value"; + CHECK(inVals_[0]) << "Should have internal input value"; CHECK(outVal_) << "Should have internal output value"; // create backward data using input and output value memory desc // but using weight memory desc with any format auto bwdDataDesc = conv_bwdData::desc(algorithm::convolution_direct, - inVal_->getMemoryDesc(), + inVals_[0]->getMemoryDesc(), MKLDNNMatrix::createMemoryDesc(wgtDims), outVal_->getMemoryDesc(), strides, @@ -294,7 +291,7 @@ void MKLDNNConvLayer::resetBwdDataPD( padding_kind::zero); pd.reset(new conv_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_)); CHECK_PRIMITIVE_DESC_EQ( - inVal_, + inVals_[0], pd->diff_src_primitive_desc(), "primitive desc of in value and grad should be equal"); CHECK_PRIMITIVE_DESC_EQ( @@ -346,12 +343,12 @@ void MKLDNNConvLayer::resetBwdPipeline( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - CHECK(inVal_); + CHECK(inVals_[0]); // add bwdWgt handle if (bias) { - bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt, *bias)); + bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVals_[0], *out, *wgt, *bias)); } else { - bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt)); + bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVals_[0], *out, *wgt)); } pipeline.push_back(*bwdWgt_); diff --git a/paddle/gserver/layers/MKLDNNConvLayer.h b/paddle/gserver/layers/MKLDNNConvLayer.h index 9c69136684e5f9005860b476ec6ed1bbc9ceff6c..3e754a0e65771879e836c13d63d5a5c8be3a699a 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.h +++ b/paddle/gserver/layers/MKLDNNConvLayer.h @@ -69,18 +69,14 @@ public: const ParameterMap& parameterMap) override; void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void updateWeights(const UpdateCallback& callback) override; @@ -107,48 +103,26 @@ protected: mkldnn::memory::dims& padL, mkldnn::memory::dims& padR); - /** - * reset the forward primitive descriptor. - */ void resetFwdPD(std::shared_ptr& pd); - /** - * reset the MKLDNNMatrix buffers used in forward. - */ void resetFwdBuffers(std::shared_ptr& pd, MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); - /** - * reset the forward pipeline. - */ void resetFwdPipeline(std::vector& pipeline, std::shared_ptr& pd, MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); - - /** - * reset the backward weight primitive descriptor. - */ void resetBwdWgtPD(std::shared_ptr& pd); - /** - * reset the backward data primitive descriptor. - */ void resetBwdDataPD(std::shared_ptr& pd); - /** - * reset the MKLDNNMatrix buffers used in backward. - */ void resetBwdBuffers(std::shared_ptr& wgtPD, std::shared_ptr& dataPD, MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); - /** - * reset the backward pipeline. - */ void resetBwdPipeline(std::vector& pipeline, std::shared_ptr& wgtPD, std::shared_ptr& dataPD, diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index 350ec65fffbc73c3a6e4245f763f4c6aa868f574..c8778bdd077c4b6d170140be92bdcdd7e8e81bb2 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -74,7 +74,7 @@ void MKLDNNFcLayer::convertWeightsToPaddle() { } void MKLDNNFcLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { reshapeInput(bs, ih, iw); CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize()); @@ -87,32 +87,29 @@ void MKLDNNFcLayer::reshape( } void MKLDNNFcLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetFwdBuffers(in, wgt, bias, out); + resetFwdBuffers(inputs[0], wgtVal_, biasVal_, out); - resetFwdPD(fwdPD_, in, wgt, bias, out); + resetFwdPD(fwdPD_, inputs[0], wgtVal_, biasVal_, out); - resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out); + resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, biasVal_, out); } void MKLDNNFcLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { std::shared_ptr bwdWgtPD; std::shared_ptr bwdDataPD; - resetBwdBuffers(in, wgt, bias, out); + resetBwdBuffers(inputs[0], wgtGrad_, biasGrad_, out); - resetBwdWgtPD(bwdWgtPD, wgt, bias, out); + resetBwdWgtPD(bwdWgtPD, wgtGrad_, biasGrad_, out); - resetBwdDataPD(bwdDataPD, in, out); + resetBwdDataPD(bwdDataPD, inputs[0], out); - resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out); + resetBwdPipeline( + pipeline, bwdWgtPD, bwdDataPD, inputs[0], wgtGrad_, biasGrad_, out); } void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) { @@ -193,9 +190,9 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - CHECK(inVal_ && outVal_); + CHECK(inVals_[0] && outVal_); resetOutGrad(out, outVal_->getPrimitiveDesc()); - resetInGrad(in, inVal_->getPrimitiveDesc()); + resetInGrad(in, inVals_[0]->getPrimitiveDesc()); CHECK(wgtVal_); resetWithMatrix(wgt, weight_->getWGrad(), wgtVal_->getPrimitiveDesc()); @@ -212,14 +209,15 @@ void MKLDNNFcLayer::resetBwdWgtPD( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - CHECK(inVal_); - fc_bwdWgt::desc bwdWgtDesc = bias ? fc_bwdWgt::desc(inVal_->getMemoryDesc(), - wgt->getMemoryDesc(), - bias->getMemoryDesc(), - out->getMemoryDesc()) - : fc_bwdWgt::desc(inVal_->getMemoryDesc(), - wgt->getMemoryDesc(), - out->getMemoryDesc()); + CHECK(inVals_[0]); + fc_bwdWgt::desc bwdWgtDesc = + bias ? fc_bwdWgt::desc(inVals_[0]->getMemoryDesc(), + wgt->getMemoryDesc(), + bias->getMemoryDesc(), + out->getMemoryDesc()) + : fc_bwdWgt::desc(inVals_[0]->getMemoryDesc(), + wgt->getMemoryDesc(), + out->getMemoryDesc()); pd.reset(new fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_)); } @@ -245,11 +243,11 @@ void MKLDNNFcLayer::resetBwdPipeline( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - CHECK(inVal_); + CHECK(inVals_[0]); if (bias) { - bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt, *bias)); + bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVals_[0], *out, *wgt, *bias)); } else { - bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt)); + bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVals_[0], *out, *wgt)); } pipeline.push_back(*bwdWgt_); diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h index ee861763ff3dc10ddb4c119358b80dbe1614aecb..283dc9b540531f6009ae6e2485b7c12d4e5cf2e3 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.h +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -52,18 +52,14 @@ public: const ParameterMap& parameterMap) override; void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void updateWeights(const UpdateCallback& callback) override; @@ -73,11 +69,6 @@ public: void convertWeightsToPaddle() override; protected: - /** - * Forward functions: reset buffers(input, output, weight and bias), - * reset primitive descriptor, - * reset pipeline. - */ void resetFwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, @@ -93,13 +84,6 @@ protected: MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); - - /** - * Backward functions: reset buffers(input, output, weight and bias), - * reset primitive descriptor for backward weight, - * reset primitive descriptor for backward data, - * reset pipeline. - */ void resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, diff --git a/paddle/gserver/layers/MKLDNNLayer.cpp b/paddle/gserver/layers/MKLDNNLayer.cpp index cf42da0735282d667d6b87061c8c59bf2f96e0be..28969d01a13b7831794cef856af11ad2ec01c31e 100644 --- a/paddle/gserver/layers/MKLDNNLayer.cpp +++ b/paddle/gserver/layers/MKLDNNLayer.cpp @@ -48,31 +48,20 @@ void MKLDNNLayer::forward(PassType passType) { REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str()); CHECK(!inputLayers_.empty()); copySeqInfoToOutputs(); - size_t elemenCnt = inputLayers_[0]->getOutputValue()->getElementCnt(); - if (inputElemenCnt_ != elemenCnt) { + if (condition_ != keepCondition()) { VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward"; - // reset when input total sizes changed, not only the batchsize - inputElemenCnt_ = elemenCnt; - pipelineFwd_.clear(); + condition_ = keepCondition(); reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_); - // all cpu device output grad or value share output's + printSizeInfo(); + // the output_.value and output_.grad are shared with CPU device shareCPUDevice(); - resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_); - // MKLDNNLayer output value should be MKLDNNMatrix - // so external output value is necessary. - // Then external input value is not necessary, - // since input may be mkldnn internal buffer. - CHECK(extOutVal_) << "external output value is necessary"; - output_.value = std::dynamic_pointer_cast(extOutVal_); - CHECK(inVal_ && outVal_) << "internal memories are necessary"; - if (cvtInVal_) { - pipelineFwd_.insert(pipelineFwd_.begin(), *cvtInVal_); - } - if (cvtOutVal_) { - pipelineFwd_.push_back(*cvtOutVal_); - } + pipelineFwd_.clear(); + inVals_.resize(inputLayers_.size(), nullptr); + extInVals_.resize(inputLayers_.size(), nullptr); + cvtInVals_.resize(inputLayers_.size(), nullptr); + resetFwd(pipelineFwd_, inVals_, outVal_); + prepareValueConversions(pipelineFwd_); convertWeightsFromPaddle(); - printSizeInfo(); printValueFormat(); needResetBwd_ = true; } @@ -80,8 +69,8 @@ void MKLDNNLayer::forward(PassType passType) { if (inputLayers_[0]->getType() == "data" && inputLayers_.size() == 1) { // Update input value data when input layer is "data" type, // since the input value data address might be changed. - CHECK(extInVal_); - extInVal_->setData(getInputValue(0, CPU_DEVICE)->getData()); + CHECK(extInVals_[0]); + extInVals_[0]->setData(getInputValue(0, CPU_DEVICE)->getData()); } if (!outputOnlyMKLDNN_) { @@ -99,22 +88,13 @@ void MKLDNNLayer::backward(const UpdateCallback& callback) { if (needResetBwd_) { VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward"; pipelineBwd_.clear(); + inGrads_.resize(inputLayers_.size(), nullptr); + extInGrads_.resize(inputLayers_.size(), nullptr); + cvtInGrads_.resize(inputLayers_.size(), nullptr); pipelineMergeGrad_.clear(); mergeGrad_ = nullptr; - resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_); - // external output grad is not necessary - // since output may be mkldnn internal buffer or merge them directly. - CHECK(outGrad_) << "internal output grad is necessary"; - if (extOutGrad_) { - CHECK_EQ(extOutGrad_->getData(), output_.grad->getData()) - << "the external buffer should share the same data with output_.grad"; - } - if (cvtOutGrad_) { - pipelineBwd_.insert(pipelineBwd_.begin(), *cvtOutGrad_); - } - if (cvtInGrad_) { - pipelineBwd_.push_back(*cvtInGrad_); - } + resetBwd(pipelineBwd_, inGrads_, outGrad_); + prepareGradConversions(pipelineBwd_); printGradFormat(); needResetBwd_ = false; } @@ -141,8 +121,8 @@ void MKLDNNLayer::backward(const UpdateCallback& callback) { void MKLDNNLayer::reshapeInput(int& batchsize, int& height, int& width, - size_t inputIdx) { - const Argument& input = inputLayers_[inputIdx]->getOutput(); + size_t idx) { + const Argument& input = inputLayers_[idx]->getOutput(); batchsize = input.getBatchSize(); int h = input.getFrameHeight(); int w = input.getFrameWidth(); @@ -176,27 +156,30 @@ void MKLDNNLayer::resetWithMatrix(MKLDNNMatrixPtr& dnn, void MKLDNNLayer::resetInValue( MKLDNNMatrixPtr& in, const std::shared_ptr& intPD, - size_t inputIdx) { - cvtInVal_ = nullptr; - extInVal_ = nullptr; + size_t idx, + int inputChannel) { + cvtInVals_[idx] = nullptr; + extInVals_[idx] = nullptr; in = nullptr; - CHECK_GT(bs_ * ic_ * ih_ * iw_, 0); + inputChannel = inputChannel == 0 ? ic_ : inputChannel; + CHECK_GT(bs_ * inputChannel * ih_ * iw_, 0); auto extPD = MKLDNNMatrix::createPrimitiveDesc( - {bs_, ic_, ih_, iw_}, format::nchw, engine_); - const MatrixPtr& inMat = inputLayers_[inputIdx]->getOutputValue(); - extInVal_ = std::dynamic_pointer_cast(inMat); - CHECK_EQ(inputIsOnlyMKLDNN(), extInVal_ != nullptr); - if (extInVal_ == nullptr || extInVal_->getFormat() == format::nc) { - extInVal_ = MKLDNNMatrix::create(extPD, inMat); + {bs_, inputChannel, ih_, iw_}, format::nchw, engine_); + const MatrixPtr& inMat = inputLayers_[idx]->getOutputValue(); + extInVals_[idx] = std::dynamic_pointer_cast(inMat); + CHECK_EQ(inputIsOnlyMKLDNN(), extInVals_[idx] != nullptr); + if (extInVals_[idx] == nullptr || + extInVals_[idx]->getFormat() == format::nc) { + extInVals_[idx] = MKLDNNMatrix::create(extPD, inMat); } - in = extInVal_; + in = extInVals_[idx]; if (nullptr == intPD || in->getPrimitiveDesc() == *intPD) { return; } // need create reorder in = MKLDNNMatrix::create(*intPD); - cvtInVal_ = MKLDNNMatrix::createReorder(extInVal_, in); - CHECK(cvtInVal_) << "should not be emptry"; + cvtInVals_[idx] = MKLDNNMatrix::createReorder(extInVals_[idx], in); + CHECK(cvtInVals_[idx]) << "should not be emptry"; } void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out, @@ -218,11 +201,11 @@ void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out, void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in, memory::primitive_desc intPD, - size_t inputIdx) { - cvtInGrad_ = nullptr; - extInGrad_ = nullptr; + size_t idx) { + cvtInGrads_[idx] = nullptr; + extInGrads_[idx] = nullptr; in = nullptr; - LayerPtr& input = inputLayers_[inputIdx]; + LayerPtr& input = inputLayers_[idx]; if (input->getOutputGrad() == nullptr) { // no need input grad return; @@ -237,23 +220,25 @@ void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in, in = MKLDNNMatrix::create(intPD, inMat); Argument& arg = input->getOutput(this->getName()); arg.grad = std::dynamic_pointer_cast(in); - CHECK_PRIMITIVE_DESC_EQ(inVal_, intPD); + CHECK_PRIMITIVE_DESC_EQ(inVals_[idx], intPD); if (inputIsOnlyMKLDNN()) { return; } - extInGrad_ = in; - if (isPaddleFormat(extInGrad_->getFormat())) { + extInGrads_[idx] = in; + if (isPaddleFormat(extInGrads_[idx]->getFormat())) { return; } // need create reorder - CHECK(extInVal_ != nullptr && isPaddleFormat(extInVal_->getFormat())) + CHECK(extInVals_[idx] != nullptr && + isPaddleFormat(extInVals_[idx]->getFormat())) << "should have external input value and the format must be nchw(nc)"; - extInGrad_ = MKLDNNMatrix::create(extInVal_->getPrimitiveDesc(), inMat); - CHECK_PRIMITIVE_DESC_EQ(inVal_, intPD); + extInGrads_[idx] = + MKLDNNMatrix::create(extInVals_[idx]->getPrimitiveDesc(), inMat); + CHECK_PRIMITIVE_DESC_EQ(inVals_[idx], intPD); in = MKLDNNMatrix::create(intPD); - cvtInGrad_ = MKLDNNMatrix::createReorder(in, extInGrad_); - CHECK(cvtInGrad_); + cvtInGrads_[idx] = MKLDNNMatrix::createReorder(in, extInGrads_[idx]); + CHECK(cvtInGrads_[idx]); } void MKLDNNLayer::resetOutGrad(MKLDNNMatrixPtr& out, diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 4c42df1bee75fa7b28c2001c30797cc0df7c5554..907927f984f1a7cd4a72038515569251df48d56f 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -34,15 +34,16 @@ typedef std::shared_ptr MKLDNNLayerPtr; */ class MKLDNNLayer : public Layer { protected: - // input value element count - size_t inputElemenCnt_; // batch size int bs_; + // they sizes are always from the first input layer // input image channel, height and width int ic_, ih_, iw_; // output image channel, height and width int oc_, oh_, ow_; + // the condition that forward need be reset + size_t condition_; // backward also need reset after reset forward handle bool needResetBwd_; @@ -67,18 +68,18 @@ protected: * When all layers are mkldnn layers, they could save internal data. */ // below MKLDNNMatrix buffers are all internal buffers - MKLDNNMatrixPtr inVal_; - MKLDNNMatrixPtr inGrad_; + std::vector inVals_; + std::vector inGrads_; MKLDNNMatrixPtr outVal_; MKLDNNMatrixPtr outGrad_; // below are external value and grad - MKLDNNMatrixPtr extInVal_; - MKLDNNMatrixPtr extInGrad_; + std::vector extInVals_; + std::vector extInGrads_; MKLDNNMatrixPtr extOutVal_; MKLDNNMatrixPtr extOutGrad_; // convert handle between external and internal buffers - std::shared_ptr cvtInVal_; - std::shared_ptr cvtInGrad_; + std::vector> cvtInVals_; + std::vector> cvtInGrads_; std::shared_ptr cvtOutVal_; std::shared_ptr cvtOutGrad_; @@ -102,14 +103,7 @@ protected: public: explicit MKLDNNLayer(const LayerConfig& config) : Layer(config), - inputElemenCnt_(0), - bs_(0), - ic_(0), - ih_(0), - iw_(0), - oc_(0), - oh_(0), - ow_(0), + condition_(0), needResetBwd_(true), outputOnlyMKLDNN_(false), engine_(mkldnn::engine::cpu, 0), @@ -125,31 +119,28 @@ public: virtual void backward(const UpdateCallback& callback); /** - * reshape the input image sizes - * and reset output image and buffer size - * output channel can not be changed + * reshape the input and output channels and image sizes + * and reset output buffer size */ virtual void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) = 0; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) = 0; /** * reset the mkldnn forward primitve and memories * only would be called when input size changes + * weight and bias buffers should be coverd by child class itself */ virtual void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) = 0; /** * reset the mkldnn backward primitve and memories * only would be called when needed + * weight and bias buffers should be coverd by child class itself */ virtual void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) = 0; /** @@ -175,13 +166,19 @@ public: void addOutputArgument(int deviceId) { Layer::addOutputArgument(deviceId); } protected: + /** + * Some layers may have different condition to reset the forward. + * The function returns the condition that do not need reset forward. + */ + inline virtual size_t keepCondition() { + // reset when the first input element size changed, not only the batchsize + return inputLayers_[0]->getOutputValue()->getElementCnt(); + } + /** * reshape the input image sizes and input batchsize */ - void reshapeInput(int& batchsize, - int& height, - int& width, - size_t inputIdx = 0); + void reshapeInput(int& batchsize, int& height, int& width, size_t idx = 0); /** * reshape output image sizes @@ -199,11 +196,13 @@ protected: /** * reset input value from input MKLDNNMatrix and internal primitive desc. * reset both internal and external buffer and create reorder if necessary. + * input channel may be different in concat. */ void resetInValue( MKLDNNMatrixPtr& in, const std::shared_ptr& intPD = nullptr, - size_t inputIdx = 0); + size_t idx = 0, + int inputChannel = 0); /** * reset output value from internal primitive desc. @@ -218,7 +217,7 @@ protected: */ void resetInGrad(MKLDNNMatrixPtr& in, mkldnn::memory::primitive_desc intPD, - size_t inputIdx = 0); + size_t idx = 0); /** * reset output grad from internal primitive desc. @@ -296,17 +295,19 @@ protected: * print the mkldnn memory format of value */ virtual void printValueFormat() { - if (extInVal_) { - VLOG(MKLDNN_FMTS) << extInVal_->getFormat() << " >>> "; - } - if (inVal_) { - VLOG(MKLDNN_FMTS) << inVal_->getFormat() << " >>>"; + for (size_t i = 0; i < inVals_.size(); ++i) { + if (!inVals_[i]) { + continue; + } + VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName() + << ": " << (extInVals_[i] ? extInVals_[i]->getFormat() + : inVals_[i]->getFormat()) + << " >>> " << inVals_[i]->getFormat() << " >>>"; } if (outVal_) { - VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> "; - } - if (extOutVal_) { - VLOG(MKLDNN_FMTS) << extOutVal_->getFormat(); + VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> " + << (extOutVal_ ? extOutVal_->getFormat() + : outVal_->getFormat()); } if (wgtVal_) { VLOG(MKLDNN_FMTS) << "Weight value format: " << wgtVal_->getFormat(); @@ -320,17 +321,19 @@ protected: * print the mkldnn memory format of grad */ virtual void printGradFormat() { - if (extOutGrad_) { - VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat(); - } if (outGrad_) { - VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< "; + VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< " + << (extOutGrad_ ? extOutGrad_->getFormat() + : outGrad_->getFormat()); } - if (inGrad_) { - VLOG(MKLDNN_FMTS) << inGrad_->getFormat() << " <<<"; - } - if (extInGrad_) { - VLOG(MKLDNN_FMTS) << extInGrad_->getFormat() << " <<< "; + for (size_t i = 0; i < inGrads_.size(); ++i) { + if (!inGrads_[i]) { + continue; + } + VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName() + << ": " << (extInGrads_[i] ? extInGrads_[i]->getFormat() + : inGrads_[i]->getFormat()) + << " <<< " << inGrads_[i]->getFormat() << " <<<"; } if (wgtGrad_) { VLOG(MKLDNN_FMTS) << "Weight grad format: " << wgtGrad_->getFormat(); @@ -437,6 +440,41 @@ private: outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims; } } + + void prepareValueConversions(std::vector& pipeline) { + // MKLDNNLayer output value should be MKLDNNMatrix + // so external output value is necessary. + // Then external input value is not necessary, + // since input may be mkldnn internal buffer. + CHECK(extOutVal_) << "external output value is necessary"; + output_.value = std::dynamic_pointer_cast(extOutVal_); + CHECK(inVals_[0] && outVal_) << "internal memories are necessary"; + for (size_t i = 0; i < cvtInVals_.size(); ++i) { + if (cvtInVals_[i]) { + pipeline.insert(pipeline.begin(), *cvtInVals_[i]); + } + } + if (cvtOutVal_) { + pipeline.push_back(*cvtOutVal_); + } + } + void prepareGradConversions(std::vector& pipeline) { + // external output grad is not necessary + // since output may be mkldnn internal buffer or merge them directly. + CHECK(outGrad_) << "internal output grad is necessary"; + if (extOutGrad_) { + CHECK_EQ(extOutGrad_->getData(), output_.grad->getData()) + << "the external buffer should share the same data with output_.grad"; + } + if (cvtOutGrad_) { + pipeline.insert(pipeline.begin(), *cvtOutGrad_); + } + for (size_t i = 0; i < cvtInGrads_.size(); ++i) { + if (cvtInGrads_[i]) { + pipeline.push_back(*cvtInGrads_[i]); + } + } + } }; } // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.cpp b/paddle/gserver/layers/MKLDNNPoolLayer.cpp index a18c455beab96ef25b5545281bae4d48cec98d9e..a8252593c8fbb8013ab909e74a057850ba54bcaa 100644 --- a/paddle/gserver/layers/MKLDNNPoolLayer.cpp +++ b/paddle/gserver/layers/MKLDNNPoolLayer.cpp @@ -58,10 +58,11 @@ bool MKLDNNPoolLayer::init(const LayerMap& layerMap, } void MKLDNNPoolLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { reshapeInput(bs, ih, iw); // ic_ and oc can not be changed - CHECK_EQ(inputElemenCnt_ / bs / ih / iw, (size_t)ic) + CHECK_EQ((size_t)ic, + inputLayers_[0]->getOutputValue()->getElementCnt() / bs / ih / iw) << "Input channel can not be changed"; // cal output sizes @@ -74,29 +75,25 @@ void MKLDNNPoolLayer::reshape( } void MKLDNNPoolLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetFwdBuffers(in, out); + resetFwdBuffers(inputs[0], out); - resetFwdPD(fwdPD_, in, out); + resetFwdPD(fwdPD_, inputs[0], out); - resetFwdPipeline(pipeline, fwdPD_, in, out); + resetFwdPipeline(pipeline, fwdPD_, inputs[0], out); } void MKLDNNPoolLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { std::shared_ptr pd; - resetBwdBuffers(in, out); + resetBwdBuffers(inputs[0], out); - resetBwdPD(pd, in, out); + resetBwdPD(pd, inputs[0], out); - resetBwdPipeline(pipeline, pd, in, out); + resetBwdPipeline(pipeline, pd, inputs[0], out); } void MKLDNNPoolLayer::resetFwdBuffers(MKLDNNMatrixPtr& in, @@ -151,9 +148,9 @@ void MKLDNNPoolLayer::resetFwdPipeline( void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out) { - CHECK(inVal_ && outVal_); + CHECK(inVals_[0] && outVal_); resetOutGrad(out, outVal_->getPrimitiveDesc()); - resetInGrad(in, inVal_->getPrimitiveDesc()); + resetInGrad(in, inVals_[0]->getPrimitiveDesc()); } void MKLDNNPoolLayer::resetBwdPD(std::shared_ptr& pd, diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.h b/paddle/gserver/layers/MKLDNNPoolLayer.h index c5ec87828bfb28b4502b4ec6b47287089c514204..dad60156f0ef7caa059ff6c70d1040e7e34c938f 100644 --- a/paddle/gserver/layers/MKLDNNPoolLayer.h +++ b/paddle/gserver/layers/MKLDNNPoolLayer.h @@ -53,18 +53,14 @@ public: const ParameterMap& parameterMap) override; void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void printSizeInfo() override { @@ -75,11 +71,6 @@ public: } protected: - /** - * Forward functions: reset buffers(input, output), - * reset primitive descriptor, - * reset pipeline. - */ void resetFwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out); void resetFwdPD(std::shared_ptr& pd, MKLDNNMatrixPtr in, @@ -88,12 +79,6 @@ protected: std::shared_ptr& pd, MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out); - - /** - * Backward functions: reset buffers(input, output), - * reset primitive descriptor, - * reset pipeline. - */ void resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out); void resetBwdPD(std::shared_ptr& pd, MKLDNNMatrixPtr& in,