diff --git a/paddle/gserver/layers/MKLDNNAddtoLayer.cpp b/paddle/gserver/layers/MKLDNNAddtoLayer.cpp index 1ab3032316cc9fd2627bc3ed78e4ad7720b969e3..22c5fa8b7667b28b0305006c6eba0ae3af307bb4 100644 --- a/paddle/gserver/layers/MKLDNNAddtoLayer.cpp +++ b/paddle/gserver/layers/MKLDNNAddtoLayer.cpp @@ -57,16 +57,15 @@ void MKLDNNAddtoLayer::reshape( } void MKLDNNAddtoLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetFwdBuffers(inVals_, biasVal_, out); - in = inVals_[0]; + resetFwdBuffers(inputs, biasVal_, out); std::shared_ptr fwdPD; std::shared_ptr biasPD; - resetFwdPD(fwdPD, biasPD, inVals_, biasVal_, out); + resetFwdPD(fwdPD, biasPD, inputs, biasVal_, out); - resetFwdPipeline(pipeline, fwdPD, biasPD, inVals_, biasVal_, out); + resetFwdPipeline(pipeline, fwdPD, biasPD, inputs, biasVal_, out); } void MKLDNNAddtoLayer::resetBwd(std::vector& pipeline, @@ -206,7 +205,7 @@ void MKLDNNAddtoLayer::resetBwdBuffers(std::vector& inputs, inputs.resize(inputLayers_.size()); for (size_t i = 0; i < inputs.size(); i++) { - resetInGrad(inputs[i], inVal_->getPrimitiveDesc(), i); + resetInGrad(inputs[i], inVals_[i]->getPrimitiveDesc(), i); CHECK_PRIMITIVE_DESC_EQ(inputs[i], out->getPrimitiveDesc()); } diff --git a/paddle/gserver/layers/MKLDNNAddtoLayer.h b/paddle/gserver/layers/MKLDNNAddtoLayer.h index 1406496a7aebbfc334237809da93dcde25913561..6ad33950b1b6161203c6b8ef05fff472301415a6 100644 --- a/paddle/gserver/layers/MKLDNNAddtoLayer.h +++ b/paddle/gserver/layers/MKLDNNAddtoLayer.h @@ -26,7 +26,6 @@ namespace paddle { */ class MKLDNNAddtoLayer : public MKLDNNLayer { protected: - std::vector inVals_; std::vector inGrads_; // layer size == ic * ih * iw == oc * oh *ow, and can not be changed @@ -53,7 +52,7 @@ public: int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, @@ -62,18 +61,6 @@ public: void updateWeights(const UpdateCallback& callback) override; - void printValueFormat() override { - for (size_t i = 0; i < inVals_.size(); ++i) { - VLOG(MKLDNN_FMTS) << i << " input: " << inVals_[i]->getFormat() << " >>>"; - } - if (outVal_) { - VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> "; - } - if (extOutVal_) { - VLOG(MKLDNN_FMTS) << extOutVal_->getFormat(); - } - } - void printGradFormat() override { if (extOutGrad_) { VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat(); diff --git a/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp b/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp index 96e5a99f3377e0016244fea7e1eb0943e95be80b..8c8101adc4faa371b817309ab3bea2b01ff1cafe 100644 --- a/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp +++ b/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp @@ -128,7 +128,7 @@ void MKLDNNBatchNormLayer::reshape( } void MKLDNNBatchNormLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, + std::vector& inputs, MKLDNNMatrixPtr& out) { // In training phase, it will always calculate mean and var, // so useGlobalStats must be false. @@ -138,11 +138,11 @@ void MKLDNNBatchNormLayer::resetFwd(std::vector& pipeline, useGlobalStats_ = false; } - resetFwdBuffers(in, wgtVal_, out); + resetFwdBuffers(inputs[0], wgtVal_, out); - resetFwdPD(fwdPD_, in, wgtVal_, out); + resetFwdPD(fwdPD_, inputs[0], wgtVal_, out); - resetFwdPipeline(pipeline, fwdPD_, in, wgtVal_, out); + resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, out); } void MKLDNNBatchNormLayer::resetBwd(std::vector& pipeline, @@ -256,9 +256,9 @@ void MKLDNNBatchNormLayer::resetFwdPipeline( void MKLDNNBatchNormLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& out) { - CHECK(inVal_ && outVal_); + CHECK(inVals_[0] && outVal_); resetOutGrad(out, outVal_->getPrimitiveDesc()); - resetInGrad(in, inVal_->getPrimitiveDesc()); + resetInGrad(in, inVals_[0]->getPrimitiveDesc()); if (gradScaleShift_) { CHECK(wgtVal_); resetWithMatrix(wgt, gradScaleShift_, wgtVal_->getPrimitiveDesc()); @@ -293,11 +293,12 @@ void MKLDNNBatchNormLayer::resetBwdPipeline( if (pd == nullptr) { return; } - CHECK(inVal_); + CHECK(inVals_[0]); bwdData_.reset( wgt && wgtVal_ - ? new bn_bwd(*pd, *inVal_, *mean_, *var_, *out, *wgtVal_, *in, *wgt) - : new bn_bwd(*pd, *inVal_, *mean_, *var_, *out, *in)); + ? new bn_bwd( + *pd, *inVals_[0], *mean_, *var_, *out, *wgtVal_, *in, *wgt) + : new bn_bwd(*pd, *inVals_[0], *mean_, *var_, *out, *in)); pipeline.push_back(*bwdData_); } diff --git a/paddle/gserver/layers/MKLDNNBatchNormLayer.h b/paddle/gserver/layers/MKLDNNBatchNormLayer.h index a9a425ee33bc2800fef229bb9fd814fd86070b0d..be6385635660e8245a3177b5d4cf9bbef97aa32c 100644 --- a/paddle/gserver/layers/MKLDNNBatchNormLayer.h +++ b/paddle/gserver/layers/MKLDNNBatchNormLayer.h @@ -76,7 +76,7 @@ public: int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, diff --git a/paddle/gserver/layers/MKLDNNConcatLayer.cpp b/paddle/gserver/layers/MKLDNNConcatLayer.cpp index a3106b0c06cca4db3e46899fb347c7b8cb9639ae..aa8ca898c8b033aa5fd362e3a637ed37e0e3db13 100644 --- a/paddle/gserver/layers/MKLDNNConcatLayer.cpp +++ b/paddle/gserver/layers/MKLDNNConcatLayer.cpp @@ -59,15 +59,14 @@ void MKLDNNConcatLayer::reshape( } void MKLDNNConcatLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetFwdBuffers(inVals_, out); - in = inVals_[0]; + resetFwdBuffers(inputs, out); std::shared_ptr fwdPD; - resetFwdPD(fwdPD, inVals_, out); + resetFwdPD(fwdPD, inputs, out); - resetFwdPipeline(pipeline, fwdPD, inVals_, out); + resetFwdPipeline(pipeline, fwdPD, inputs, out); } void MKLDNNConcatLayer::resetBwd(std::vector& pipeline, @@ -157,14 +156,9 @@ void MKLDNNConcatLayer::resetBwdBuffers(std::vector& inputs, inputs.resize(inputLayers_.size()); for (size_t i = 0; i < inputs.size(); i++) { CHECK(inVals_[i]); - // resetInGrad will use inVal_ - // TODO(TJ): change move inVals_ to MKLDNNLayer ans remove inVal_ - inVal_ = inVals_[i]; resetInGrad(inputs[i], inVals_[i]->getPrimitiveDesc(), i); CHECK_PRIMITIVE_DESC_EQ(inputs[i], inVals_[i]->getPrimitiveDesc()); } - // change back, inVal_ always save the input 0 - inVal_ = inVals_[0]; } void MKLDNNConcatLayer::resetBwdPipeline( diff --git a/paddle/gserver/layers/MKLDNNConcatLayer.h b/paddle/gserver/layers/MKLDNNConcatLayer.h index 2750a6ed2940d1ddb3191b184bb5fc12b25c81cd..14863aed3c25a767727d4b480896c76efc9a67dd 100644 --- a/paddle/gserver/layers/MKLDNNConcatLayer.h +++ b/paddle/gserver/layers/MKLDNNConcatLayer.h @@ -26,7 +26,6 @@ namespace paddle { */ class MKLDNNConcatLayer : public MKLDNNLayer { protected: - std::vector inVals_; std::vector inGrads_; std::vector> bwds_; // input channel numbers @@ -50,7 +49,7 @@ public: int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, @@ -68,19 +67,6 @@ public: << ", " << ow_; } - void printValueFormat() override { - for (size_t i = 0; i < inVals_.size(); ++i) { - VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName() - << ": " << inVals_[i]->getFormat() << " >>>"; - } - if (outVal_) { - VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> "; - } - if (extOutVal_) { - VLOG(MKLDNN_FMTS) << extOutVal_->getFormat(); - } - } - void printGradFormat() override { if (extOutGrad_) { VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat(); diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp index 5d89f230d28ab114fc3676951cbd54670bdd857c..0bacd6a9d3685d96acb9e111f3fc9067d711fc4d 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.cpp +++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp @@ -105,13 +105,13 @@ void MKLDNNConvLayer::reshape( } void MKLDNNConvLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, + std::vector& inputs, MKLDNNMatrixPtr& out) { resetFwdPD(fwdPD_); - resetFwdBuffers(fwdPD_, in, wgtVal_, biasVal_, out); + resetFwdBuffers(fwdPD_, inputs[0], wgtVal_, biasVal_, out); - resetFwdPipeline(pipeline, fwdPD_, in, wgtVal_, biasVal_, out); + resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, biasVal_, out); } void MKLDNNConvLayer::resetBwd(std::vector& pipeline, @@ -232,14 +232,14 @@ void MKLDNNConvLayer::resetBwdWgtPD( loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); // create backward weight using input, output and weight value memory desc - CHECK(inVal_) << "Should have internal input value"; + CHECK(inVals_[0]) << "Should have internal input value"; CHECK(outVal_) << "Should have internal output value"; CHECK(wgtVal_) << "Should have weight value"; algorithm algo = algorithm::convolution_direct; padding_kind padKind = padding_kind::zero; auto bwdWgtDesc = biasVal_ != nullptr ? conv_bwdWgt::desc(algo, - inVal_->getMemoryDesc(), + inVals_[0]->getMemoryDesc(), wgtVal_->getMemoryDesc(), biasVal_->getMemoryDesc(), outVal_->getMemoryDesc(), @@ -248,7 +248,7 @@ void MKLDNNConvLayer::resetBwdWgtPD( padR, padKind) : conv_bwdWgt::desc(algo, - inVal_->getMemoryDesc(), + inVals_[0]->getMemoryDesc(), wgtVal_->getMemoryDesc(), outVal_->getMemoryDesc(), strides, @@ -256,7 +256,7 @@ void MKLDNNConvLayer::resetBwdWgtPD( padR, padKind); pd.reset(new conv_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_)); - CHECK_PRIMITIVE_DESC_EQ(inVal_, pd->src_primitive_desc()); + CHECK_PRIMITIVE_DESC_EQ(inVals_[0], pd->src_primitive_desc()); CHECK_PRIMITIVE_DESC_EQ( outVal_, pd->diff_dst_primitive_desc(), @@ -276,12 +276,12 @@ void MKLDNNConvLayer::resetBwdDataPD( memory::dims wgtDims, biasDims, strides, dilations, padL, padR; loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); - CHECK(inVal_) << "Should have internal input value"; + CHECK(inVals_[0]) << "Should have internal input value"; CHECK(outVal_) << "Should have internal output value"; // create backward data using input and output value memory desc // but using weight memory desc with any format auto bwdDataDesc = conv_bwdData::desc(algorithm::convolution_direct, - inVal_->getMemoryDesc(), + inVals_[0]->getMemoryDesc(), MKLDNNMatrix::createMemoryDesc(wgtDims), outVal_->getMemoryDesc(), strides, @@ -290,7 +290,7 @@ void MKLDNNConvLayer::resetBwdDataPD( padding_kind::zero); pd.reset(new conv_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_)); CHECK_PRIMITIVE_DESC_EQ( - inVal_, + inVals_[0], pd->diff_src_primitive_desc(), "primitive desc of in value and grad should be equal"); CHECK_PRIMITIVE_DESC_EQ( @@ -342,12 +342,12 @@ void MKLDNNConvLayer::resetBwdPipeline( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - CHECK(inVal_); + CHECK(inVals_[0]); // add bwdWgt handle if (bias) { - bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt, *bias)); + bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVals_[0], *out, *wgt, *bias)); } else { - bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt)); + bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVals_[0], *out, *wgt)); } pipeline.push_back(*bwdWgt_); diff --git a/paddle/gserver/layers/MKLDNNConvLayer.h b/paddle/gserver/layers/MKLDNNConvLayer.h index 900f42af847a222a81ddd5347aae438bba0860fe..ff416e4f31da2acb087dae34924c19ca1071586b 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.h +++ b/paddle/gserver/layers/MKLDNNConvLayer.h @@ -72,7 +72,7 @@ public: int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index ccf11e04a37e9e0a26ebdb96db5c8387930e1423..9cb1580672c18f55ffb2190263904947c1996529 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -87,13 +87,13 @@ void MKLDNNFcLayer::reshape( } void MKLDNNFcLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetFwdBuffers(in, wgtVal_, biasVal_, out); + resetFwdBuffers(inputs[0], wgtVal_, biasVal_, out); - resetFwdPD(fwdPD_, in, wgtVal_, biasVal_, out); + resetFwdPD(fwdPD_, inputs[0], wgtVal_, biasVal_, out); - resetFwdPipeline(pipeline, fwdPD_, in, wgtVal_, biasVal_, out); + resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, biasVal_, out); } void MKLDNNFcLayer::resetBwd(std::vector& pipeline, @@ -189,9 +189,9 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - CHECK(inVal_ && outVal_); + CHECK(inVals_[0] && outVal_); resetOutGrad(out, outVal_->getPrimitiveDesc()); - resetInGrad(in, inVal_->getPrimitiveDesc()); + resetInGrad(in, inVals_[0]->getPrimitiveDesc()); CHECK(wgtVal_); resetWithMatrix(wgt, weight_->getWGrad(), wgtVal_->getPrimitiveDesc()); @@ -208,14 +208,15 @@ void MKLDNNFcLayer::resetBwdWgtPD( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - CHECK(inVal_); - fc_bwdWgt::desc bwdWgtDesc = bias ? fc_bwdWgt::desc(inVal_->getMemoryDesc(), - wgt->getMemoryDesc(), - bias->getMemoryDesc(), - out->getMemoryDesc()) - : fc_bwdWgt::desc(inVal_->getMemoryDesc(), - wgt->getMemoryDesc(), - out->getMemoryDesc()); + CHECK(inVals_[0]); + fc_bwdWgt::desc bwdWgtDesc = + bias ? fc_bwdWgt::desc(inVals_[0]->getMemoryDesc(), + wgt->getMemoryDesc(), + bias->getMemoryDesc(), + out->getMemoryDesc()) + : fc_bwdWgt::desc(inVals_[0]->getMemoryDesc(), + wgt->getMemoryDesc(), + out->getMemoryDesc()); pd.reset(new fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_)); } @@ -241,11 +242,11 @@ void MKLDNNFcLayer::resetBwdPipeline( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - CHECK(inVal_); + CHECK(inVals_[0]); if (bias) { - bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt, *bias)); + bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVals_[0], *out, *wgt, *bias)); } else { - bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt)); + bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVals_[0], *out, *wgt)); } pipeline.push_back(*bwdWgt_); diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h index a9c916ea13299306b024adb8a3ad4f7cc5ac3c46..a7ea4cd4311435f10d799b9389c390c3ed4a7f3c 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.h +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -55,7 +55,7 @@ public: int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, diff --git a/paddle/gserver/layers/MKLDNNLayer.cpp b/paddle/gserver/layers/MKLDNNLayer.cpp index 02170ea8160d7f7ed6208d3a5144399791878ffb..99350cd012c8c955b347875b98c2157a5661ffed 100644 --- a/paddle/gserver/layers/MKLDNNLayer.cpp +++ b/paddle/gserver/layers/MKLDNNLayer.cpp @@ -53,25 +53,17 @@ void MKLDNNLayer::forward(PassType passType) { VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward"; // reset when input total sizes changed, not only the batchsize inputElemenCnt_ = elemenCnt; - pipelineFwd_.clear(); reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_); printSizeInfo(); - // all cpu device output grad or value share output's + // the output_.value and output_.grad are shared with CPU device shareCPUDevice(); - resetFwd(pipelineFwd_, inVal_, outVal_); - // MKLDNNLayer output value should be MKLDNNMatrix - // so external output value is necessary. - // Then external input value is not necessary, - // since input may be mkldnn internal buffer. - CHECK(extOutVal_) << "external output value is necessary"; - output_.value = std::dynamic_pointer_cast(extOutVal_); - CHECK(inVal_ && outVal_) << "internal memories are necessary"; - if (cvtInVal_) { - pipelineFwd_.insert(pipelineFwd_.begin(), *cvtInVal_); - } - if (cvtOutVal_) { - pipelineFwd_.push_back(*cvtOutVal_); - } + + pipelineFwd_.clear(); + inVals_.resize(inputLayers_.size(), nullptr); + extInVals_.resize(inputLayers_.size(), nullptr); + cvtInVals_.resize(inputLayers_.size(), nullptr); + resetFwd(pipelineFwd_, inVals_, outVal_); + prepareValueConversions(pipelineFwd_); convertWeightsFromPaddle(); printValueFormat(); needResetBwd_ = true; @@ -80,8 +72,8 @@ void MKLDNNLayer::forward(PassType passType) { if (inputLayers_[0]->getType() == "data" && inputLayers_.size() == 1) { // Update input value data when input layer is "data" type, // since the input value data address might be changed. - CHECK(extInVal_); - extInVal_->setData(getInputValue(0, CPU_DEVICE)->getData()); + CHECK(extInVals_[0]); + extInVals_[0]->setData(getInputValue(0, CPU_DEVICE)->getData()); } if (!outputOnlyMKLDNN_) { @@ -141,8 +133,8 @@ void MKLDNNLayer::backward(const UpdateCallback& callback) { void MKLDNNLayer::reshapeInput(int& batchsize, int& height, int& width, - size_t inputIdx) { - const Argument& input = inputLayers_[inputIdx]->getOutput(); + size_t idx) { + const Argument& input = inputLayers_[idx]->getOutput(); batchsize = input.getBatchSize(); int h = input.getFrameHeight(); int w = input.getFrameWidth(); @@ -176,29 +168,30 @@ void MKLDNNLayer::resetWithMatrix(MKLDNNMatrixPtr& dnn, void MKLDNNLayer::resetInValue( MKLDNNMatrixPtr& in, const std::shared_ptr& intPD, - size_t inputIdx, + size_t idx, int inputChannel) { - cvtInVal_ = nullptr; - extInVal_ = nullptr; + cvtInVals_[idx] = nullptr; + extInVals_[idx] = nullptr; in = nullptr; inputChannel = inputChannel == 0 ? ic_ : inputChannel; CHECK_GT(bs_ * inputChannel * ih_ * iw_, 0); auto extPD = MKLDNNMatrix::createPrimitiveDesc( {bs_, inputChannel, ih_, iw_}, format::nchw, engine_); - const MatrixPtr& inMat = inputLayers_[inputIdx]->getOutputValue(); - extInVal_ = std::dynamic_pointer_cast(inMat); - CHECK_EQ(inputIsOnlyMKLDNN(), extInVal_ != nullptr); - if (extInVal_ == nullptr || extInVal_->getFormat() == format::nc) { - extInVal_ = MKLDNNMatrix::create(extPD, inMat); + const MatrixPtr& inMat = inputLayers_[idx]->getOutputValue(); + extInVals_[idx] = std::dynamic_pointer_cast(inMat); + CHECK_EQ(inputIsOnlyMKLDNN(), extInVals_[idx] != nullptr); + if (extInVals_[idx] == nullptr || + extInVals_[idx]->getFormat() == format::nc) { + extInVals_[idx] = MKLDNNMatrix::create(extPD, inMat); } - in = extInVal_; + in = extInVals_[idx]; if (nullptr == intPD || in->getPrimitiveDesc() == *intPD) { return; } // need create reorder in = MKLDNNMatrix::create(*intPD); - cvtInVal_ = MKLDNNMatrix::createReorder(extInVal_, in); - CHECK(cvtInVal_) << "should not be emptry"; + cvtInVals_[idx] = MKLDNNMatrix::createReorder(extInVals_[idx], in); + CHECK(cvtInVals_[idx]) << "should not be emptry"; } void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out, @@ -220,11 +213,11 @@ void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out, void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in, memory::primitive_desc intPD, - size_t inputIdx) { + size_t idx) { cvtInGrad_ = nullptr; extInGrad_ = nullptr; in = nullptr; - LayerPtr& input = inputLayers_[inputIdx]; + LayerPtr& input = inputLayers_[idx]; if (input->getOutputGrad() == nullptr) { // no need input grad return; @@ -239,7 +232,7 @@ void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in, in = MKLDNNMatrix::create(intPD, inMat); Argument& arg = input->getOutput(this->getName()); arg.grad = std::dynamic_pointer_cast(in); - CHECK_PRIMITIVE_DESC_EQ(inVal_, intPD); + CHECK_PRIMITIVE_DESC_EQ(inVals_[idx], intPD); if (inputIsOnlyMKLDNN()) { return; } @@ -249,10 +242,11 @@ void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in, return; } // need create reorder - CHECK(extInVal_ != nullptr && isPaddleFormat(extInVal_->getFormat())) + CHECK(extInVals_[idx] != nullptr && + isPaddleFormat(extInVals_[idx]->getFormat())) << "should have external input value and the format must be nchw(nc)"; - extInGrad_ = MKLDNNMatrix::create(extInVal_->getPrimitiveDesc(), inMat); - CHECK_PRIMITIVE_DESC_EQ(inVal_, intPD); + extInGrad_ = MKLDNNMatrix::create(extInVals_[idx]->getPrimitiveDesc(), inMat); + CHECK_PRIMITIVE_DESC_EQ(inVals_[idx], intPD); in = MKLDNNMatrix::create(intPD); cvtInGrad_ = MKLDNNMatrix::createReorder(in, extInGrad_); CHECK(cvtInGrad_); diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 0e271908099b0d0e513233a7130f1b199281dfde..0ae4b8087f92afe44c9715552df5d5edfd39debf 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -68,17 +68,17 @@ protected: * When all layers are mkldnn layers, they could save internal data. */ // below MKLDNNMatrix buffers are all internal buffers - MKLDNNMatrixPtr inVal_; + std::vector inVals_; MKLDNNMatrixPtr inGrad_; MKLDNNMatrixPtr outVal_; MKLDNNMatrixPtr outGrad_; // below are external value and grad - MKLDNNMatrixPtr extInVal_; + std::vector extInVals_; MKLDNNMatrixPtr extInGrad_; MKLDNNMatrixPtr extOutVal_; MKLDNNMatrixPtr extOutGrad_; // convert handle between external and internal buffers - std::shared_ptr cvtInVal_; + std::vector> cvtInVals_; std::shared_ptr cvtInGrad_; std::shared_ptr cvtOutVal_; std::shared_ptr cvtOutGrad_; @@ -138,7 +138,7 @@ public: * weight and bias buffers should be coverd by child class itself */ virtual void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, + std::vector& inputs, MKLDNNMatrixPtr& out) = 0; /** @@ -176,10 +176,7 @@ protected: /** * reshape the input image sizes and input batchsize */ - void reshapeInput(int& batchsize, - int& height, - int& width, - size_t inputIdx = 0); + void reshapeInput(int& batchsize, int& height, int& width, size_t idx = 0); /** * reshape output image sizes @@ -202,7 +199,7 @@ protected: void resetInValue( MKLDNNMatrixPtr& in, const std::shared_ptr& intPD = nullptr, - size_t inputIdx = 0, + size_t idx = 0, int inputChannel = 0); /** @@ -218,7 +215,7 @@ protected: */ void resetInGrad(MKLDNNMatrixPtr& in, mkldnn::memory::primitive_desc intPD, - size_t inputIdx = 0); + size_t idx = 0); /** * reset output grad from internal primitive desc. @@ -296,17 +293,19 @@ protected: * print the mkldnn memory format of value */ virtual void printValueFormat() { - if (extInVal_) { - VLOG(MKLDNN_FMTS) << extInVal_->getFormat() << " >>> "; - } - if (inVal_) { - VLOG(MKLDNN_FMTS) << inVal_->getFormat() << " >>>"; + for (size_t i = 0; i < inVals_.size(); ++i) { + if (!inVals_[i]) { + continue; + } + VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName() + << ": " << (extInVals_[i] ? extInVals_[i]->getFormat() + : inVals_[i]->getFormat()) + << " >>> " << inVals_[i]->getFormat() << " >>>"; } if (outVal_) { - VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> "; - } - if (extOutVal_) { - VLOG(MKLDNN_FMTS) << extOutVal_->getFormat(); + VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> " + << (extOutVal_ ? extOutVal_->getFormat() + : outVal_->getFormat()); } if (wgtVal_) { VLOG(MKLDNN_FMTS) << "Weight value format: " << wgtVal_->getFormat(); @@ -437,6 +436,24 @@ private: outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims; } } + + void prepareValueConversions(std::vector& pipeline) { + // MKLDNNLayer output value should be MKLDNNMatrix + // so external output value is necessary. + // Then external input value is not necessary, + // since input may be mkldnn internal buffer. + CHECK(extOutVal_) << "external output value is necessary"; + output_.value = std::dynamic_pointer_cast(extOutVal_); + CHECK(inVals_[0] && outVal_) << "internal memories are necessary"; + for (size_t i = 0; i < cvtInVals_.size(); ++i) { + if (cvtInVals_[i]) { + pipeline.insert(pipeline.begin(), *cvtInVals_[i]); + } + } + if (cvtOutVal_) { + pipeline.push_back(*cvtOutVal_); + } + } }; } // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.cpp b/paddle/gserver/layers/MKLDNNPoolLayer.cpp index 79102aba00b062562c7ea899bc7408c4f6990a8d..21a04cd5ee020986e40ebe4f24897fd64a0d98ca 100644 --- a/paddle/gserver/layers/MKLDNNPoolLayer.cpp +++ b/paddle/gserver/layers/MKLDNNPoolLayer.cpp @@ -74,13 +74,13 @@ void MKLDNNPoolLayer::reshape( } void MKLDNNPoolLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetFwdBuffers(in, out); + resetFwdBuffers(inputs[0], out); - resetFwdPD(fwdPD_, in, out); + resetFwdPD(fwdPD_, inputs[0], out); - resetFwdPipeline(pipeline, fwdPD_, in, out); + resetFwdPipeline(pipeline, fwdPD_, inputs[0], out); } void MKLDNNPoolLayer::resetBwd(std::vector& pipeline, @@ -147,9 +147,9 @@ void MKLDNNPoolLayer::resetFwdPipeline( void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out) { - CHECK(inVal_ && outVal_); + CHECK(inVals_[0] && outVal_); resetOutGrad(out, outVal_->getPrimitiveDesc()); - resetInGrad(in, inVal_->getPrimitiveDesc()); + resetInGrad(in, inVals_[0]->getPrimitiveDesc()); } void MKLDNNPoolLayer::resetBwdPD(std::shared_ptr& pd, diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.h b/paddle/gserver/layers/MKLDNNPoolLayer.h index 972419c5af0ac9e67621b7c661c1751d7d9932b8..242846cbcf7e3b76f7d47ecbf63e63073d6c9521 100644 --- a/paddle/gserver/layers/MKLDNNPoolLayer.h +++ b/paddle/gserver/layers/MKLDNNPoolLayer.h @@ -56,7 +56,7 @@ public: int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline,