From b0d9b68a5ffe08615414d412f464bc64c9f18497 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Thu, 14 Sep 2017 20:34:21 +0800 Subject: [PATCH] unify functions of mkldnn_fc and refine comments --- paddle/gserver/layers/MKLDNNConvLayer.cpp | 9 +- paddle/gserver/layers/MKLDNNFcLayer.cpp | 277 ++++++++++++++-------- paddle/gserver/layers/MKLDNNFcLayer.h | 59 +++++ paddle/math/MKLDNNMatrix.h | 11 +- 4 files changed, 251 insertions(+), 105 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp index f8c06c5f868..9088744beeb 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.cpp +++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp @@ -285,10 +285,9 @@ void MKLDNNConvLayer::resetWgtBiasValue( wgt = MKLDNNMatrix::create(weight_->getW(), pd->weights_primitive_desc()); VLOG(MKLDNN_FMTS) << "Weight value format: " << wgt->getFormat(); - bias = nullptr; - if (biases_ && biases_->getW()) { - bias = MKLDNNMatrix::create(biases_->getW(), pd->bias_primitive_desc()); - } + bias = (biases_ && biases_->getW()) + ? MKLDNNMatrix::create(biases_->getW(), pd->bias_primitive_desc()) + : nullptr; } void MKLDNNConvLayer::resetOutValue( @@ -356,6 +355,7 @@ void MKLDNNConvLayer::resetBwdWgtPD( void MKLDNNConvLayer::resetBwdDataPD( std::shared_ptr& pd) { + pd = nullptr; if (inputLayers_[0]->getOutput().grad == nullptr) { return; } @@ -476,6 +476,7 @@ void MKLDNNConvLayer::resetWgtBiasGrad( << "primitive desc of weight grad and value should be equal"; VLOG(MKLDNN_FMTS) << "weight grad format: " << wgt->getFormat(); + bias = nullptr; if (biasVal_ == nullptr) { return; } diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index f70343251ad..f60e221a6ec 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -17,9 +17,6 @@ limitations under the License. */ using namespace mkldnn; // NOLINT typedef memory::format format; -typedef inner_product_forward fc_fwd; -typedef inner_product_backward_weights fc_bwdWgt; -typedef inner_product_backward_data fc_bwdData; namespace paddle { @@ -93,35 +90,88 @@ void MKLDNNFcLayer::reshape( printSizeInfo(); } -void MKLDNNFcLayer::resetFwd(std::vector& pipeline, +void MKLDNNFcLayer::resetFwd(std::vector& pipeline, MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - pipeline.clear(); - bool hasBias = biases_ && biases_->getW(); - const MatrixPtr& wgtVal = weight_->getW(); - const MatrixPtr& biasVal = hasBias ? biases_->getW() : nullptr; - const MatrixPtr& outVal = output_.value; + resetFwdBuffers(in, wgt, bias, out); + + resetFwdPD(fwdPD_, in, wgt, bias, out); + + resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out); + + printValueFormatFlow(); +} + +void MKLDNNFcLayer::resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + std::shared_ptr bwdWgtPD; + std::shared_ptr bwdDataPD; + + resetBwdBuffers(in, wgt, bias, out); + + resetBwdWgtPD(bwdWgtPD, wgt, bias, out); + + resetBwdDataPD(bwdDataPD, in, out); + + resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out); + + printGradFormatFlow(); +} + +void MKLDNNFcLayer::updateInputData() { + inVal_->setData(getInputValue(0, CPU_DEVICE)->getData()); +} +void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) { + weight_->getParameterPtr()->incUpdate(callback); + if (biases_ && biases_->getWGrad()) { + biases_->getParameterPtr()->incUpdate(callback); + } +} + +void MKLDNNFcLayer::resetFwdBuffers(MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + resetInValue(in); + + resetWgtBiasValue(wgt, bias); + + resetOutValue(out); +} + +void MKLDNNFcLayer::resetInValue(MKLDNNMatrixPtr& in) { if (inputIsOnlyMKLDNN()) { - const MatrixPtr& inVal = getInputValue(0); - in = std::dynamic_pointer_cast(inVal); + const MatrixPtr& dnnIn = getInputValue(0); + in = std::dynamic_pointer_cast(dnnIn); CHECK(in) << "Input should be MKLDNNMatrix"; } else { CHECK_EQ(getPrev(0)->getDeviceId(), CPU_DEVICE) << "Only support CPU yet"; - const MatrixPtr& inVal = getInputValue(0, CPU_DEVICE); + const MatrixPtr& cpuIn = getInputValue(0, CPU_DEVICE); in = MKLDNNMatrix::create( - inVal, memory::dims{bs_, ic_, ih_, iw_}, format::nchw, engine_); + cpuIn, {bs_, ic_, ih_, iw_}, format::nchw, engine_); } in->downSpatial(); +} + +void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias) { wgt = MKLDNNMatrix::create( - wgtVal, memory::dims{oc_, ic_, ih_, iw_}, format::oihw, engine_); + weight_->getW(), {oc_, ic_, ih_, iw_}, format::oihw, engine_); wgt->downSpatial(); - bias = hasBias ? MKLDNNMatrix::create(biasVal, {oc_}, format::x, engine_) - : nullptr; - out = MKLDNNMatrix::create(outVal, {bs_, oc_}, format::nc, engine_); + bias = (biases_ && biases_->getW()) + ? MKLDNNMatrix::create(biases_->getW(), {oc_}, format::x, engine_) + : nullptr; +} + +void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) { + out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_); // change original output value to mkldnn output value output_.value = std::dynamic_pointer_cast(out); if (!outputIsOnlyMKLDNN()) { @@ -129,46 +179,59 @@ void MKLDNNFcLayer::resetFwd(std::vector& pipeline, // just share point getOutput(CPU_DEVICE).value->setData(output_.value->getData()); } +} - // create forward handle +void MKLDNNFcLayer::resetFwdPD(std::shared_ptr& pd, + MKLDNNMatrixPtr in, + MKLDNNMatrixPtr wgt, + MKLDNNMatrixPtr bias, + MKLDNNMatrixPtr out) { + CHECK(in); + CHECK(wgt); + CHECK(out); prop_kind pk = prop_kind::forward; - fc_fwd::desc fwdDesc = hasBias ? fc_fwd::desc(pk, - in->getMemoryDesc(), - wgt->getMemoryDesc(), - bias->getMemoryDesc(), - out->getMemoryDesc()) - : fc_fwd::desc(pk, - in->getMemoryDesc(), - wgt->getMemoryDesc(), - out->getMemoryDesc()); - fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - if (hasBias) { - fwd_.reset(new fc_fwd(fwdPD, *in, *wgt, *bias, *out)); + fc_fwd::desc fwdDesc = bias != nullptr ? fc_fwd::desc(pk, + in->getMemoryDesc(), + wgt->getMemoryDesc(), + bias->getMemoryDesc(), + out->getMemoryDesc()) + : fc_fwd::desc(pk, + in->getMemoryDesc(), + wgt->getMemoryDesc(), + out->getMemoryDesc()); + pd.reset(new fc_fwd::primitive_desc(fwdDesc, engine_)); +} + +void MKLDNNFcLayer::resetFwdPipeline( + std::vector& pipeline, + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + pipeline.clear(); + + if (bias) { + fwd_.reset(new fc_fwd(*pd, *in, *wgt, *bias, *out)); } else { - fwd_.reset(new fc_fwd(fwdPD, *in, *wgt, *out)); + fwd_.reset(new fc_fwd(*pd, *in, *wgt, *out)); } - printValueFormatFlow(); pipeline.push_back(*fwd_); } -void MKLDNNFcLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out) { - pipeline.clear(); - if (!needResetBwd_) { - return; - } - needResetBwd_ = false; - bool hasBias = biases_ && biases_->getWGrad(); +void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + resetOutGrad(out); + + resetWgtBiasGrad(wgt, bias); - /// backward weight - CHECK(inVal_) << "Should have input value"; - const MatrixPtr& wgtGrad = weight_->getWGrad(); - const MatrixPtr& biasGrad = hasBias ? biases_->getWGrad() : nullptr; + resetInGrad(in); +} +void MKLDNNFcLayer::resetOutGrad(MKLDNNMatrixPtr& out) { // TODO(TJ): merge outgrad int device = outputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE; // for MKLDNN device: @@ -178,66 +241,88 @@ void MKLDNNFcLayer::resetBwd(std::vector& pipeline, // for CPU device: // fc do not need to convert from cpu device since output is always nc format // only need create from cpu device - const MatrixPtr& outGrad = getOutput(device).grad; - out = MKLDNNMatrix::create(outGrad, outVal_->getPrimitiveDesc()); - wgt = MKLDNNMatrix::create(wgtGrad, wgtVal_->getPrimitiveDesc()); - bias = hasBias ? MKLDNNMatrix::create(biasGrad, biasVal_->getPrimitiveDesc()) - : nullptr; - - // create memory primitive desc - fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, - inVal_->getMemoryDesc(), - wgt->getMemoryDesc(), - out->getMemoryDesc()); - fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - fc_bwdWgt::desc bwdWgtDesc = hasBias - ? fc_bwdWgt::desc(inVal_->getMemoryDesc(), - wgt->getMemoryDesc(), - bias->getMemoryDesc(), - out->getMemoryDesc()) - : fc_bwdWgt::desc(inVal_->getMemoryDesc(), - wgt->getMemoryDesc(), - out->getMemoryDesc()); - fc_bwdWgt::primitive_desc bwdWgtPD = - fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); - - if (hasBias) { - bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt, *bias)); - } else { - bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt)); + CHECK(outVal_); + out = + MKLDNNMatrix::create(getOutput(device).grad, outVal_->getPrimitiveDesc()); +} + +void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias) { + CHECK(wgtVal_); + wgt = MKLDNNMatrix::create(weight_->getWGrad(), wgtVal_->getPrimitiveDesc()); + + bias = nullptr; + if (biasVal_ == nullptr) { + return; } - pipeline.push_back(*bwdWgt_); + bias = + MKLDNNMatrix::create(biases_->getWGrad(), biasVal_->getPrimitiveDesc()); +} - /// backward data +void MKLDNNFcLayer::resetInGrad(MKLDNNMatrixPtr& in) { + in = nullptr; const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad; if (inGrad == nullptr) { return; } - if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) { - // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done - } else { - in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc()); - } - - fc_bwdData::desc bwdDataDesc = fc_bwdData::desc( - inVal_->getMemoryDesc(), wgt->getMemoryDesc(), out->getMemoryDesc()); - fc_bwdData::primitive_desc bwdDataPD = - fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); + // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done + CHECK(inVal_); + in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc()); +} - CHECK(wgtVal_) << "Should have weight memory"; - bwdData_.reset(new fc_bwdData(bwdDataPD, *out, *wgtVal_, *in)); - printGradFormatFlow(); - pipeline.push_back(*bwdData_); +void MKLDNNFcLayer::resetBwdWgtPD( + std::shared_ptr& pd, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + CHECK(inVal_); + fc_bwdWgt::desc bwdWgtDesc = bias ? fc_bwdWgt::desc(inVal_->getMemoryDesc(), + wgt->getMemoryDesc(), + bias->getMemoryDesc(), + out->getMemoryDesc()) + : fc_bwdWgt::desc(inVal_->getMemoryDesc(), + wgt->getMemoryDesc(), + out->getMemoryDesc()); + pd.reset(new fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_)); } -void MKLDNNFcLayer::updateInputData() { - inVal_->setData(getInputValue(0, CPU_DEVICE)->getData()); +void MKLDNNFcLayer::resetBwdDataPD( + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& out) { + pd = nullptr; + if (in == nullptr) { + return; + } + CHECK(wgtVal_); + fc_bwdData::desc bwdDataDesc = fc_bwdData::desc( + in->getMemoryDesc(), wgtVal_->getMemoryDesc(), out->getMemoryDesc()); + pd.reset(new fc_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_)); } -void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) { - weight_->getParameterPtr()->incUpdate(callback); - if (biases_ && biases_->getWGrad()) { - biases_->getParameterPtr()->incUpdate(callback); +void MKLDNNFcLayer::resetBwdPipeline( + std::vector& pipeline, + std::shared_ptr& bwdWgtPD, + std::shared_ptr& bwdDataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + pipeline.clear(); + CHECK(inVal_); + if (bias) { + bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt, *bias)); + } else { + bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt)); + } + pipeline.push_back(*bwdWgt_); + + if (bwdDataPD == nullptr) { + return; } + CHECK(wgtVal_) << "Should have weight memory"; + bwdData_.reset(new fc_bwdData(*bwdDataPD, *out, *wgtVal_, *in)); + pipeline.push_back(*bwdData_); } + } // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h index 3119f863496..c76878aafab 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.h +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -18,6 +18,9 @@ limitations under the License. */ #include "mkldnn.hpp" namespace paddle { +typedef mkldnn::inner_product_forward fc_fwd; +typedef mkldnn::inner_product_backward_weights fc_bwdWgt; +typedef mkldnn::inner_product_backward_data fc_bwdData; /** * @brief A subclass of MKLDNNLayer fc layer. @@ -32,6 +35,9 @@ protected: // if has already init the weight bool hasInitedWgt_; + // save forward primitive_desc, which can be used backward + std::shared_ptr fwdPD_; + // fc weight and bias std::unique_ptr weight_; std::unique_ptr biases_; @@ -67,6 +73,59 @@ public: void convertWeightsFromPaddle() override; void convertWeightsToPaddle() override; + +protected: + /** + * Forward functions: reset buffers(input, output, weight and bias), + * reset primitive descriptor, + * reset pipeline. + */ + void resetFwdBuffers(MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + void resetInValue(MKLDNNMatrixPtr& in); + void resetWgtBiasValue(MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias); + void resetOutValue(MKLDNNMatrixPtr& out); + void resetFwdPD(std::shared_ptr& pd, + MKLDNNMatrixPtr in, + MKLDNNMatrixPtr wgt, + MKLDNNMatrixPtr bias, + MKLDNNMatrixPtr out); + void resetFwdPipeline(std::vector& pipeline, + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + + /** + * Backward functions: reset buffers(input, output, weight and bias), + * reset primitive descriptor for backward weight, + * reset primitive descriptor for backward data, + * reset pipeline. + */ + void resetBwdBuffers(MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + void resetOutGrad(MKLDNNMatrixPtr& out); + void resetWgtBiasGrad(MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias); + void resetInGrad(MKLDNNMatrixPtr& in); + void resetBwdWgtPD(std::shared_ptr& pd, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + void resetBwdDataPD(std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& out); + void resetBwdPipeline(std::vector& pipeline, + std::shared_ptr& bwdWgtPD, + std::shared_ptr& bwdDataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); }; } // namespace paddle diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index 0aa130b4a0d..c843115eb9a 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -66,11 +66,12 @@ public: /** * Create reorder primitive. * Create a mkldnn::reorder handle for converting src MKLDNNMatrix to dst. - * checkData: for whether to check the data handle of src and dst is the same. - * if true, means check it and do not want support inplace reorder; - * otherwise do not check data which means the created reorder - * maybe inplace buffer and do not guarantee the logical is correct - * since not all format or conversion support inplace. + * checkData: whether to check the data handle of src and dst. + * if true, it will check the data and do not allow them equal; + * otherwise, it will not check them, then the reorder created + * may have inplace buffer. + * Do not set false, if you can not guarantee the inplace logical + * would work with your reorder. */ static std::shared_ptr createReorder( const MKLDNNMatrixPtr& src, -- GitLab