diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index a47967b3d3cdf488dc0e1222311bdf72c6cda902..f70343251ad4fbb99f9614618f6d1bff1174f15e 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -78,46 +78,52 @@ void MKLDNNFcLayer::convertWeightsToPaddle() { wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim); } -void MKLDNNFcLayer::reshape() { - reshapeInput(); +void MKLDNNFcLayer::reshape( + int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + reshapeInput(bs, ih, iw); CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize()); - ic_ = iLayerSize_ / (ih_ * iw_); - CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible"; - CHECK_EQ(size_t(oc_), getSize()); + ic = iLayerSize_ / (ih * iw); + CHECK_EQ(size_t(ic * ih * iw), iLayerSize_) << "not divisible"; + CHECK_EQ(size_t(oc), getSize()); - reshapeOutput(oh_, ow_); - resizeOutput(bs_, oc_); + reshapeOutput(oh, ow); + resizeOutput(bs, oc); printSizeInfo(); } -void MKLDNNFcLayer::resetFwd() { +void MKLDNNFcLayer::resetFwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + pipeline.clear(); bool hasBias = biases_ && biases_->getW(); - const MatrixPtr& wgt = weight_->getW(); - const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr; - const MatrixPtr& out = output_.value; + const MatrixPtr& wgtVal = weight_->getW(); + const MatrixPtr& biasVal = hasBias ? biases_->getW() : nullptr; + const MatrixPtr& outVal = output_.value; if (inputIsOnlyMKLDNN()) { - const MatrixPtr& in = getInputValue(0); - inVal_ = std::dynamic_pointer_cast(in); - CHECK(inVal_) << "Input should be MKLDNNMatrix"; + const MatrixPtr& inVal = getInputValue(0); + in = std::dynamic_pointer_cast(inVal); + CHECK(in) << "Input should be MKLDNNMatrix"; } else { CHECK_EQ(getPrev(0)->getDeviceId(), CPU_DEVICE) << "Only support CPU yet"; - const MatrixPtr& in = getInputValue(0, CPU_DEVICE); - inVal_ = MKLDNNMatrix::create( - in, memory::dims{bs_, ic_, ih_, iw_}, format::nchw, engine_); + const MatrixPtr& inVal = getInputValue(0, CPU_DEVICE); + in = MKLDNNMatrix::create( + inVal, memory::dims{bs_, ic_, ih_, iw_}, format::nchw, engine_); } - inVal_->downSpatial(); - wgtVal_ = MKLDNNMatrix::create( - wgt, memory::dims{oc_, ic_, ih_, iw_}, format::oihw, engine_); - wgtVal_->downSpatial(); - biasVal_ = - hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr; - outVal_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_); + in->downSpatial(); + wgt = MKLDNNMatrix::create( + wgtVal, memory::dims{oc_, ic_, ih_, iw_}, format::oihw, engine_); + wgt->downSpatial(); + bias = hasBias ? MKLDNNMatrix::create(biasVal, {oc_}, format::x, engine_) + : nullptr; + out = MKLDNNMatrix::create(outVal, {bs_, oc_}, format::nc, engine_); // change original output value to mkldnn output value - output_.value = std::dynamic_pointer_cast(outVal_); + output_.value = std::dynamic_pointer_cast(out); if (!outputIsOnlyMKLDNN()) { // fc cpu output value do not need create convert // just share point @@ -127,27 +133,31 @@ void MKLDNNFcLayer::resetFwd() { // create forward handle prop_kind pk = prop_kind::forward; fc_fwd::desc fwdDesc = hasBias ? fc_fwd::desc(pk, - inVal_->getMemoryDesc(), - wgtVal_->getMemoryDesc(), - biasVal_->getMemoryDesc(), - outVal_->getMemoryDesc()) + in->getMemoryDesc(), + wgt->getMemoryDesc(), + bias->getMemoryDesc(), + out->getMemoryDesc()) : fc_fwd::desc(pk, - inVal_->getMemoryDesc(), - wgtVal_->getMemoryDesc(), - outVal_->getMemoryDesc()); + in->getMemoryDesc(), + wgt->getMemoryDesc(), + out->getMemoryDesc()); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); if (hasBias) { - fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); + fwd_.reset(new fc_fwd(fwdPD, *in, *wgt, *bias, *out)); } else { - fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_)); + fwd_.reset(new fc_fwd(fwdPD, *in, *wgt, *out)); } printValueFormatFlow(); - pipelineFwd_.clear(); - pipelineFwd_.push_back(*fwd_); + pipeline.push_back(*fwd_); } -void MKLDNNFcLayer::resetBwd() { +void MKLDNNFcLayer::resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + pipeline.clear(); if (!needResetBwd_) { return; } @@ -156,8 +166,8 @@ void MKLDNNFcLayer::resetBwd() { /// backward weight CHECK(inVal_) << "Should have input value"; - const MatrixPtr& wgt = weight_->getWGrad(); - const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr; + const MatrixPtr& wgtGrad = weight_->getWGrad(); + const MatrixPtr& biasGrad = hasBias ? biases_->getWGrad() : nullptr; // TODO(TJ): merge outgrad int device = outputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE; @@ -168,59 +178,56 @@ void MKLDNNFcLayer::resetBwd() { // for CPU device: // fc do not need to convert from cpu device since output is always nc format // only need create from cpu device - const MatrixPtr& out = getOutput(device).grad; - outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc()); - wgtGrad_ = MKLDNNMatrix::create(wgt, wgtVal_->getPrimitiveDesc()); - biasGrad_ = hasBias ? MKLDNNMatrix::create(bias, biasVal_->getPrimitiveDesc()) - : nullptr; + const MatrixPtr& outGrad = getOutput(device).grad; + out = MKLDNNMatrix::create(outGrad, outVal_->getPrimitiveDesc()); + wgt = MKLDNNMatrix::create(wgtGrad, wgtVal_->getPrimitiveDesc()); + bias = hasBias ? MKLDNNMatrix::create(biasGrad, biasVal_->getPrimitiveDesc()) + : nullptr; // create memory primitive desc fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, inVal_->getMemoryDesc(), - wgtGrad_->getMemoryDesc(), - outGrad_->getMemoryDesc()); + wgt->getMemoryDesc(), + out->getMemoryDesc()); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); fc_bwdWgt::desc bwdWgtDesc = hasBias ? fc_bwdWgt::desc(inVal_->getMemoryDesc(), - wgtGrad_->getMemoryDesc(), - biasGrad_->getMemoryDesc(), - outGrad_->getMemoryDesc()) + wgt->getMemoryDesc(), + bias->getMemoryDesc(), + out->getMemoryDesc()) : fc_bwdWgt::desc(inVal_->getMemoryDesc(), - wgtGrad_->getMemoryDesc(), - outGrad_->getMemoryDesc()); + wgt->getMemoryDesc(), + out->getMemoryDesc()); fc_bwdWgt::primitive_desc bwdWgtPD = fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); if (hasBias) { - bwdWgt_.reset( - new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_, *biasGrad_)); + bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt, *bias)); } else { - bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_)); + bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt)); } - pipelineBwd_.clear(); - pipelineBwd_.push_back(*bwdWgt_); + pipeline.push_back(*bwdWgt_); /// backward data - const MatrixPtr& in = inputLayers_[0]->getOutput().grad; - if (in == nullptr) { + const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad; + if (inGrad == nullptr) { return; } if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) { // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done } else { - inGrad_ = MKLDNNMatrix::create(in, inVal_->getPrimitiveDesc()); + in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc()); } - fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(inVal_->getMemoryDesc(), - wgtGrad_->getMemoryDesc(), - outGrad_->getMemoryDesc()); + fc_bwdData::desc bwdDataDesc = fc_bwdData::desc( + inVal_->getMemoryDesc(), wgt->getMemoryDesc(), out->getMemoryDesc()); fc_bwdData::primitive_desc bwdDataPD = fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); CHECK(wgtVal_) << "Should have weight memory"; - bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_)); + bwdData_.reset(new fc_bwdData(bwdDataPD, *out, *wgtVal_, *in)); printGradFormatFlow(); - pipelineBwd_.push_back(*bwdData_); + pipeline.push_back(*bwdData_); } void MKLDNNFcLayer::updateInputData() { diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h index add8ac999147487276115b904324d45f9b42066f..3119f863496df092da13c08bf733f13c42e53780 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.h +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -45,11 +45,20 @@ public: bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; - void reshape() override; - - void resetFwd() override; - - void resetBwd() override; + void reshape( + int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + + void resetFwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) override; + + void resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) override; void updateInputData() override; diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index c10f2fec2f3f711ee3bdab762c73e15742571f02..169679c8297542cac4a43f5a8e1af311ad9282df 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -111,13 +111,14 @@ public: { REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str()); - copySeqInfoToOutputs(); CHECK(!inputLayers_.empty()); + copySeqInfoToOutputs(); size_t elemenCnt = inputLayers_[0]->getOutput().value->getElementCnt(); if (inputElemenCnt_ != elemenCnt) { + // reset when input total sizes changed, not only the batchsize inputElemenCnt_ = elemenCnt; - reshape(); - resetFwd(); + reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_); + resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_); convertWeightsFromPaddle(); needResetBwd_ = true; } @@ -144,7 +145,7 @@ public: { REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str()); if (needResetBwd_) { - resetBwd(); + resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_); needResetBwd_ = false; } @@ -160,20 +161,30 @@ public: /** * reshape the input image sizes * and reset output image and buffer size + * output channel can not be changed */ - virtual void reshape() = 0; + virtual void reshape( + int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) = 0; /** * reset the mkldnn forward primitve and memory * only would be called when input size changes */ - virtual void resetFwd() = 0; + virtual void resetFwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) = 0; /** * reset the mkldnn backward primitve and memory for mkldnn fc * only would be called when needed */ - virtual void resetBwd() = 0; + virtual void resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) = 0; /** * Update input value data when input layer is "data" type. @@ -207,16 +218,16 @@ protected: /** * reshape the input image sizes and input batchsize */ - virtual void reshapeInput() { + virtual void reshapeInput(int& batchsize, int& height, int& width) { const Argument& input = inputLayers_[0]->getOutput(); - bs_ = input.getBatchSize(); - int height = input.getFrameHeight(); - int width = input.getFrameWidth(); - if (height != 0) { - ih_ = height; + batchsize = input.getBatchSize(); + int h = input.getFrameHeight(); + int w = input.getFrameWidth(); + if (h != 0) { + height = h; } - if (width != 0) { - iw_ = width; + if (w != 0) { + width = w; } }