From f2317b67f7673eea465dbc0e41b4235d0927aa72 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 13 Sep 2017 16:44:37 +0800 Subject: [PATCH] separate resetFwd and resetBwd to some sub functions --- paddle/gserver/layers/MKLDNNConvLayer.cpp | 513 ++++++++++++++-------- paddle/gserver/layers/MKLDNNConvLayer.h | 108 ++++- 2 files changed, 433 insertions(+), 188 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp index 19891043a1..f8c06c5f86 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.cpp +++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp @@ -18,9 +18,6 @@ limitations under the License. */ using namespace mkldnn; // NOLINT typedef memory::format format; -typedef convolution_forward conv_fwd; -typedef convolution_backward_weights conv_bwdWgt; -typedef convolution_backward_data conv_bwdData; namespace paddle { @@ -114,237 +111,396 @@ void MKLDNNConvLayer::resetFwd(std::vector& pipeline, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - pipeline.clear(); - bool hasBias = biases_ && biases_->getW(); - biasVal_ = nullptr; + resetFwdPD(fwdPD_); + + resetFwdBuffers(fwdPD_, in, wgt, bias, out); + + resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out); + + printValueFormatFlow(); +} + +void MKLDNNConvLayer::resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + std::shared_ptr bwdWgtPD; + std::shared_ptr bwdDataPD; + + resetBwdWgtPD(bwdWgtPD); + + resetBwdDataPD(bwdDataPD); + + resetBwdBuffers(bwdWgtPD, bwdDataPD, in, wgt, bias, out); + resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out); + + printGradFormatFlow(); +} + +void MKLDNNConvLayer::updateInputData() { + cpuInVal_->setData(getInputValue(0, CPU_DEVICE)->getData()); +} + +void MKLDNNConvLayer::updateWeights(const UpdateCallback& callback) { + weight_->getParameterPtr()->incUpdate(callback); + if (biases_ && biases_->getWGrad()) { + biases_->getParameterPtr()->incUpdate(callback); + } +} + +void MKLDNNConvLayer::loadConvSettings(memory::dims& wgt, + memory::dims& bias, + memory::dims& stride, + memory::dims& dilation, + memory::dims& padL, + memory::dims& padR) { + wgt = (gp_ == 1) ? memory::dims{oc_, ic_, fh_, fw_} + : memory::dims{gp_, oc_ / gp_, ic_ / gp_, fh_, fw_}; + bias = memory::dims{oc_}; + stride = memory::dims{sh_, sw_}; + padL = memory::dims{ph_, pw_}; + padR = getPaddingR(); + // note: mkldnn dilation start from 0 + dilation = memory::dims{dh_ - 1, dw_ - 1}; +} + +void MKLDNNConvLayer::resetFwdPD( + std::shared_ptr& pd) { // dims for conv memory::dims inDims = memory::dims{bs_, ic_, ih_, iw_}; memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; - memory::dims wgtDims = - (gp_ == 1) ? memory::dims{oc_, ic_, fh_, fw_} - : memory::dims{gp_, oc_ / gp_, ic_ / gp_, fh_, fw_}; - memory::dims biasDims = memory::dims{oc_}; - memory::dims strides = {sh_, sw_}; - // note: mkldnn dilation start from 0 - memory::dims dilations = {dh_ - 1, dw_ - 1}; - memory::dims padding = {ph_, pw_}; - memory::dims padR = getPaddingR(); + memory::dims wgtDims, biasDims, strides, dilations, padL, padR; + loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); - // create forward handle - prop_kind pk = - passType_ == PASS_TEST ? prop_kind::forward : prop_kind::forward_training; + prop_kind pk = passType_ == PASS_TEST ? prop_kind::forward_scoring + : prop_kind::forward_training; algorithm algo = algorithm::convolution_direct; padding_kind padKind = padding_kind::zero; conv_fwd::desc fwdDesc = - hasBias ? conv_fwd::desc(pk, - algo, - MKLDNNMatrix::createMemoryDesc(inDims), - MKLDNNMatrix::createMemoryDesc(wgtDims), - MKLDNNMatrix::createMemoryDesc(biasDims), - MKLDNNMatrix::createMemoryDesc(outDims), - strides, - dilations, - padding, - padR, - padKind) - : conv_fwd::desc(pk, - algo, - MKLDNNMatrix::createMemoryDesc(inDims), - MKLDNNMatrix::createMemoryDesc(wgtDims), - MKLDNNMatrix::createMemoryDesc(outDims), - strides, - dilations, - padding, - padR, - padKind); - fwdPD_.reset(new conv_fwd::primitive_desc(fwdDesc, engine_)); - - // create mkldnn matrix - const MatrixPtr& wgtVal = weight_->getW(); - const MatrixPtr& inVal = inputLayers_[0]->getOutput().value; - const MatrixPtr& outVal = output_.value; - wgt = MKLDNNMatrix::create(wgtVal, fwdPD_->weights_primitive_desc()); - in = MKLDNNMatrix::create(inVal, fwdPD_->src_primitive_desc()); - out = MKLDNNMatrix::create(outVal, fwdPD_->dst_primitive_desc()); - VLOG(MKLDNN_FMTS) << "Weight value format: " << wgtVal_->getFormat(); - if (hasBias) { - const MatrixPtr& biasVal = biases_->getW(); - bias = MKLDNNMatrix::create(biasVal, biasDims, format::x, engine_); - CHECK(bias->getPrimitiveDesc() == fwdPD_->bias_primitive_desc()) - << "bias primitive desc should always be equal"; + biases_ && biases_->getW() + ? conv_fwd::desc(pk, + algo, + MKLDNNMatrix::createMemoryDesc(inDims), + MKLDNNMatrix::createMemoryDesc(wgtDims), + MKLDNNMatrix::createMemoryDesc(biasDims), + MKLDNNMatrix::createMemoryDesc(outDims), + strides, + dilations, + padL, + padR, + padKind) + : conv_fwd::desc(pk, + algo, + MKLDNNMatrix::createMemoryDesc(inDims), + MKLDNNMatrix::createMemoryDesc(wgtDims), + MKLDNNMatrix::createMemoryDesc(outDims), + strides, + dilations, + padL, + padR, + padKind); + pd.reset(new conv_fwd::primitive_desc(fwdDesc, engine_)); +} + +void MKLDNNConvLayer::resetFwdBuffers( + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + CHECK(pd); + resetInValue(pd, in); + + resetWgtBiasValue(pd, wgt, bias); + + resetOutValue(pd, out); +} + +void MKLDNNConvLayer::resetFwdPipeline( + std::vector& pipeline, + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + pipeline.clear(); + + if (cvtInVal_) { + pipeline.push_back(*cvtInVal_); + } + + if (bias) { + fwd_.reset(new conv_fwd(*pd, *in, *wgt, *bias, *out)); + } else { + fwd_.reset(new conv_fwd(*pd, *in, *wgt, *out)); } + pipeline.push_back(*fwd_); + + if (cvtOutVal_) { + pipeline.push_back(*cvtOutVal_); + } +} - // add reorder if input value do not match +void MKLDNNConvLayer::resetInValue( + std::shared_ptr& pd, MKLDNNMatrixPtr& in) { + const MatrixPtr& inMat = inputLayers_[0]->getOutput().value; + in = MKLDNNMatrix::create(inMat, pd->src_primitive_desc()); + + // create buffer and reorder if input value do not match + cpuInVal_ = nullptr; + cvtInVal_ = nullptr; if (inputIsOnlyMKLDNN()) { - MKLDNNMatrixPtr dnnIn = std::dynamic_pointer_cast(inVal); + MKLDNNMatrixPtr dnnIn = std::dynamic_pointer_cast(inMat); CHECK(dnnIn) << "Input should be MKLDNNMatrix"; if (dnnIn->getPrimitiveDesc() != in->getPrimitiveDesc()) { CHECK_EQ(dnnIn->getFormat(), format::nc); - CHECK(ih_ == 1 && iw_ == 1); - dnnIn = MKLDNNMatrix::create(inVal, inDims, format::nchw, engine_); + CHECK(ih_ == 1 && iw_ == 1) << "when input is nc format"; + // create a new one with nchw format and same data + memory::dims inDims = memory::dims{bs_, ic_, 1, 1}; + dnnIn = MKLDNNMatrix::create(inMat, inDims, format::nchw, engine_); CHECK(dnnIn->getPrimitiveDesc() == in->getPrimitiveDesc()); } in = dnnIn; } else { const MatrixPtr& cpuIn = getInputValue(0, CPU_DEVICE); + memory::dims inDims = memory::dims{bs_, ic_, ih_, iw_}; cpuInVal_ = MKLDNNMatrix::create(cpuIn, inDims, format::nchw, engine_); if (cpuInVal_->getPrimitiveDesc() != in->getPrimitiveDesc()) { // create new mkldnn matrix - in = MKLDNNMatrix::create(nullptr, fwdPD_->src_primitive_desc()); + in = MKLDNNMatrix::create(nullptr, pd->src_primitive_desc()); cvtInVal_ = MKLDNNMatrix::createReorder(cpuInVal_, in); - CHECK(cvtInVal_); - pipeline.push_back(*cvtInVal_); + CHECK(cvtInVal_) << "should not be emptry"; } else { in = cpuInVal_; } } +} - // add fwd handle - if (hasBias) { - fwd_.reset(new conv_fwd(*fwdPD_, *in, *wgt, *bias, *out)); - } else { - fwd_.reset(new conv_fwd(*fwdPD_, *in, *wgt, *out)); +void MKLDNNConvLayer::resetWgtBiasValue( + std::shared_ptr& pd, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias) { + wgt = MKLDNNMatrix::create(weight_->getW(), pd->weights_primitive_desc()); + VLOG(MKLDNN_FMTS) << "Weight value format: " << wgt->getFormat(); + + bias = nullptr; + if (biases_ && biases_->getW()) { + bias = MKLDNNMatrix::create(biases_->getW(), pd->bias_primitive_desc()); } - pipeline.push_back(*fwd_); +} + +void MKLDNNConvLayer::resetOutValue( + std::shared_ptr& pd, MKLDNNMatrixPtr& out) { + out = MKLDNNMatrix::create(output_.value, pd->dst_primitive_desc()); // change original output value from cpu matrix to mkldnn matrix output_.value = std::dynamic_pointer_cast(out); - // add reorder if output value has cpu device and pd do not match + + // create reorder if output value has cpu device and pd do not match + cpuOutVal_ = nullptr; + cpuOutVal_ = nullptr; if (!outputIsOnlyMKLDNN()) { const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; + memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_); if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) { cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); - CHECK(cvtOutVal_); - pipeline.push_back(*cvtOutVal_); + CHECK(cvtOutVal_) << "should not be emptry"; } else { - // share data + // CPU output share the same data of MKLDNN output cpuOut->setData(out->getData()); cpuOutVal_ = out; } } - - printValueFormatFlow(); } -void MKLDNNConvLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out) { - pipeline.clear(); - bool hasBias = biases_ && biases_->getWGrad(); +void MKLDNNConvLayer::resetBwdWgtPD( + std::shared_ptr& pd) { + memory::dims wgtDims, biasDims, strides, dilations, padL, padR; + loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); - /// backward weight + // create backward weight using input, output and weight value memory desc CHECK(inVal_) << "Should have input value"; CHECK(outVal_) << "Should have output value"; CHECK(wgtVal_) << "Should have weight value"; - memory::dims wgtDims = - (gp_ == 1) ? memory::dims{oc_, ic_, fh_, fw_} - : memory::dims{gp_, oc_ / gp_, ic_ / gp_, fh_, fw_}; - memory::dims strides = {sh_, sw_}; - memory::dims dilations = {dh_ - 1, dw_ - 1}; - memory::dims padding = {ph_, pw_}; - memory::dims padR = getPaddingR(); - - // create backward handle algorithm algo = algorithm::convolution_direct; padding_kind padKind = padding_kind::zero; - auto bwdWgtDesc = - hasBias ? conv_bwdWgt::desc(algo, - inVal_->getMemoryDesc(), - MKLDNNMatrix::createMemoryDesc(wgtDims), - biasVal_->getMemoryDesc(), - outVal_->getMemoryDesc(), - strides, - padding, - padR, - padKind) - : conv_bwdWgt::desc(algo, - inVal_->getMemoryDesc(), - MKLDNNMatrix::createMemoryDesc(wgtDims), - outVal_->getMemoryDesc(), - strides, - padding, - padR, - padKind); - - auto bwdWgtPD = conv_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_); - CHECK(bwdWgtPD.src_primitive_desc() == inVal_->getPrimitiveDesc()) + auto bwdWgtDesc = biasVal_ != nullptr + ? conv_bwdWgt::desc(algo, + inVal_->getMemoryDesc(), + wgtVal_->getMemoryDesc(), + biasVal_->getMemoryDesc(), + outVal_->getMemoryDesc(), + strides, + padL, + padR, + padKind) + : conv_bwdWgt::desc(algo, + inVal_->getMemoryDesc(), + wgtVal_->getMemoryDesc(), + outVal_->getMemoryDesc(), + strides, + padL, + padR, + padKind); + pd.reset(new conv_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_)); + CHECK(pd->src_primitive_desc() == inVal_->getPrimitiveDesc()) << "primitive desc of in value should equal"; - CHECK(bwdWgtPD.diff_dst_primitive_desc() == outVal_->getPrimitiveDesc()) + CHECK(pd->diff_dst_primitive_desc() == outVal_->getPrimitiveDesc()) << "primitive desc of out grad should equal the out value"; - CHECK(bwdWgtPD.diff_weights_primitive_desc() == wgtVal_->getPrimitiveDesc()) + CHECK(pd->diff_weights_primitive_desc() == wgtVal_->getPrimitiveDesc()) << "primitive desc of weight grad should equal the weight value"; +} - // create mkldnn matrix - const MatrixPtr& wgtGrad = weight_->getWGrad(); - const MatrixPtr& outGrad = output_.grad; - wgt = MKLDNNMatrix::create(wgtGrad, bwdWgtPD.diff_weights_primitive_desc()); - out = MKLDNNMatrix::create(outGrad, bwdWgtPD.diff_dst_primitive_desc()); - CHECK(wgt->getPrimitiveDesc() == wgtVal_->getPrimitiveDesc()) - << "primitive desc of weight grad and value should be equal"; - CHECK(out->getPrimitiveDesc() == outVal_->getPrimitiveDesc()) - << "primitive desc of out grad and value should be equal"; - VLOG(MKLDNN_FMTS) << "Backward weight, weight grad format: " - << wgt->getFormat(); - if (hasBias) { - const MatrixPtr& biasGrad = biases_->getWGrad(); - bias = MKLDNNMatrix::create(biasGrad, bwdWgtPD.diff_bias_primitive_desc()); - CHECK(bias->getPrimitiveDesc() == biasVal_->getPrimitiveDesc()) - << "primitive desc of bias grad should equal the bias value"; +void MKLDNNConvLayer::resetBwdDataPD( + std::shared_ptr& pd) { + if (inputLayers_[0]->getOutput().grad == nullptr) { + return; } + memory::dims wgtDims, biasDims, strides, dilations, padL, padR; + loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); + CHECK(inVal_) << "Should have input value"; + CHECK(outVal_) << "Should have output value"; + // create backward data using input and output value memory desc + // but using weight memory desc with any format + auto bwdDataDesc = conv_bwdData::desc(algorithm::convolution_direct, + inVal_->getMemoryDesc(), + MKLDNNMatrix::createMemoryDesc(wgtDims), + outVal_->getMemoryDesc(), + strides, + padL, + padR, + padding_kind::zero); + pd.reset(new conv_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_)); + CHECK(pd->diff_src_primitive_desc() == inVal_->getPrimitiveDesc()) + << "primitive desc of in grad should equal the in value"; + CHECK(pd->diff_dst_primitive_desc() == outVal_->getPrimitiveDesc()) + << "primitive desc of out grad should equal"; +} + +void MKLDNNConvLayer::resetBwdBuffers( + std::shared_ptr& wgtPD, + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + CHECK(wgtPD); + resetOutGrad(wgtPD, out); + + resetWgtBiasGrad(wgtPD, wgt, bias); + + resetInGrad(dataPD, in); + + resetWgtValBwdData(dataPD, wgtValBwdData_); +} + +void MKLDNNConvLayer::resetBwdPipeline( + std::vector& pipeline, + std::shared_ptr& wgtPD, + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + pipeline.clear(); + + if (cvtOutGrad_) { + pipeline.push_back(*cvtOutGrad_); + } + + // add bwdWgt handle + if (bias) { + bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt, *bias)); + } else { + bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt)); + } + pipeline.push_back(*bwdWgt_); + + if (dataPD == nullptr) { + return; + } + + if (cvtWgtVal_) { + pipeline.push_back(*cvtWgtVal_); + } + + // add bwdData handle + CHECK(wgtValBwdData_) << "Should have weight memory"; + bwdData_.reset(new conv_bwdData(*dataPD, *out, *wgtValBwdData_, *in)); + pipeline.push_back(*bwdData_); + + if (cvtInGrad_) { + pipeline.push_back(*cvtInGrad_); + } +} + +void MKLDNNConvLayer::resetOutGrad( + std::shared_ptr& wgtPD, MKLDNNMatrixPtr& out) { + const MatrixPtr& outMat = output_.grad; + out = MKLDNNMatrix::create(outMat, wgtPD->diff_dst_primitive_desc()); + CHECK(outVal_ != nullptr && + out->getPrimitiveDesc() == outVal_->getPrimitiveDesc()) + << "primitive desc of out grad and value should be equal"; + // TODO(TJ): merge outgrad - // add reorder if has user output grad + // create reorder if has output grad does not match + cpuOutGrad_ = nullptr; + cvtOutGrad_ = nullptr; if (!outputIsOnlyMKLDNN()) { const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; - memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; // same PrimitiveDesc with cpuInVal_ CHECK(cpuOutVal_); cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc()); if (cpuOutGrad_->getPrimitiveDesc() == out->getPrimitiveDesc()) { - outGrad->setData(cpuOut->getData()); + outMat->setData(cpuOut->getData()); out = cpuOutGrad_; } else { cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out); CHECK(cvtOutGrad_); - pipeline.push_back(*cvtOutGrad_); } } +} - // add bwdWgt handle - if (hasBias) { - bwdWgt_.reset(new conv_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt, *bias)); - } else { - bwdWgt_.reset(new conv_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt)); - } - pipeline.push_back(*bwdWgt_); +void MKLDNNConvLayer::resetWgtBiasGrad( + std::shared_ptr& wgtPD, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias) { + wgt = MKLDNNMatrix::create(weight_->getWGrad(), + wgtPD->diff_weights_primitive_desc()); + CHECK(nullptr != wgtVal_ && + wgt->getPrimitiveDesc() == wgtVal_->getPrimitiveDesc()) + << "primitive desc of weight grad and value should be equal"; + VLOG(MKLDNN_FMTS) << "weight grad format: " << wgt->getFormat(); - /// backward data - const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad; - if (inGrad == nullptr) { + if (biasVal_ == nullptr) { return; } + bias = MKLDNNMatrix::create(biases_->getWGrad(), + wgtPD->diff_bias_primitive_desc()); + CHECK(bias->getPrimitiveDesc() == biasVal_->getPrimitiveDesc()) + << "primitive desc of bias grad should equal the bias value"; +} - auto bwdDataDesc = conv_bwdData::desc(algo, - inVal_->getMemoryDesc(), - MKLDNNMatrix::createMemoryDesc(wgtDims), - out->getMemoryDesc(), - strides, - padding, - padR, - padKind); - auto bwdDataPD = conv_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_); - CHECK(bwdDataPD.diff_src_primitive_desc() == inVal_->getPrimitiveDesc()) - << "primitive desc of in grad should equal the in value"; - CHECK(bwdDataPD.diff_dst_primitive_desc() == out->getPrimitiveDesc()) - << "primitive desc of out grad should equal"; +void MKLDNNConvLayer::resetInGrad( + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in) { + if (dataPD == nullptr) { + return; + } - // create mkldnn matrix inGrad_ and reorder if necessary // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done - in = MKLDNNMatrix::create(inGrad, bwdDataPD.diff_src_primitive_desc()); + in = MKLDNNMatrix::create(inputLayers_[0]->getOutput().grad, + dataPD->diff_src_primitive_desc()); + CHECK(nullptr != inVal_ && + in->getPrimitiveDesc() == inVal_->getPrimitiveDesc()) + << "primitive desc of input grad and value should be equal"; + + // create reorder if has output grad does not match + cpuInGrad_ = nullptr; cvtInGrad_ = nullptr; if (!inputIsOnlyMKLDNN()) { const MatrixPtr& cpuIn = getInputGrad(0, CPU_DEVICE); @@ -360,43 +516,28 @@ void MKLDNNConvLayer::resetBwd(std::vector& pipeline, in = cpuInGrad_; } } +} - // create new weight value for backward data, and reorder if necessary +void MKLDNNConvLayer::resetWgtValBwdData( + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& wgt) { + if (dataPD == nullptr) { + return; + } + + // create new weight value for backward data, and create reorder if necessary // since the primitive_desc would be different with wgtVal_ - if (bwdDataPD.weights_primitive_desc() != wgtVal_->getPrimitiveDesc()) { + CHECK(wgtVal_) << "should have weight value"; + if (dataPD->weights_primitive_desc() != wgtVal_->getPrimitiveDesc()) { wgtValBwdData_ = - MKLDNNMatrix::create(nullptr, bwdDataPD.weights_primitive_desc()); + MKLDNNMatrix::create(nullptr, dataPD->weights_primitive_desc()); cvtWgtVal_ = MKLDNNMatrix::createReorder(wgtVal_, wgtValBwdData_); CHECK(cvtWgtVal_); - pipeline.push_back(*cvtWgtVal_); } else { wgtValBwdData_ = wgtVal_; } - VLOG(MKLDNN_FMTS) << "Backward data, weight value format: " + VLOG(MKLDNN_FMTS) << "weight value format for backward data" << wgtValBwdData_->getFormat(); - - // add bwdData handle - CHECK(wgtValBwdData_) << "Should have weight memory"; - bwdData_.reset(new conv_bwdData(bwdDataPD, *out, *wgtValBwdData_, *in)); - pipeline.push_back(*bwdData_); - - // add ingrad reorder after bwdData - if (cvtInGrad_) { - pipeline.push_back(*cvtInGrad_); - } - - printGradFormatFlow(); -} - -void MKLDNNConvLayer::updateInputData() { - cpuInVal_->setData(getInputValue(0, CPU_DEVICE)->getData()); -} - -void MKLDNNConvLayer::updateWeights(const UpdateCallback& callback) { - weight_->getParameterPtr()->incUpdate(callback); - if (biases_ && biases_->getWGrad()) { - biases_->getParameterPtr()->incUpdate(callback); - } } } // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNConvLayer.h b/paddle/gserver/layers/MKLDNNConvLayer.h index d1a78ac1c0..f84f2f737c 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.h +++ b/paddle/gserver/layers/MKLDNNConvLayer.h @@ -18,6 +18,9 @@ limitations under the License. */ #include "mkldnn.hpp" namespace paddle { +typedef mkldnn::convolution_forward conv_fwd; +typedef mkldnn::convolution_backward_weights conv_bwdWgt; +typedef mkldnn::convolution_backward_data conv_bwdData; /** * @brief A subclass of MKLDNNLayer conv layer. @@ -43,7 +46,7 @@ protected: std::shared_ptr cvtWgtVal_; // save forward primitive_desc, which can be used backward - std::shared_ptr fwdPD_; + std::shared_ptr fwdPD_; // MKLDNNMatrixPtr which should be created from CPU Device MKLDNNMatrixPtr cpuInVal_; @@ -99,7 +102,6 @@ public: void convertWeightsToPaddle() override; -protected: void printSizeInfo() override { MKLDNNLayer::printSizeInfo(); VLOG(MKLDNN_SIZES) << getName() << ": fh: " << fh_ << ", fw: " << fw_ @@ -116,6 +118,7 @@ protected: VLOG(MKLDNN_FMTS) << " >>> " << cpuOutVal_->getFormat(); } } + void printGradFormatFlow() override { if (cpuInGrad_) { VLOG(MKLDNN_FMTS) << cpuInGrad_->getFormat() << " <<<"; @@ -126,6 +129,107 @@ protected: } } +protected: + /** + * load the dims settings of this conv + */ + void loadConvSettings(mkldnn::memory::dims& wgt, + mkldnn::memory::dims& bias, + mkldnn::memory::dims& stride, + mkldnn::memory::dims& dilation, + mkldnn::memory::dims& padL, + mkldnn::memory::dims& padR); + + /** + * reset the forward primitive descriptor. + */ + void resetFwdPD(std::shared_ptr& pd); + /** + * reset the MKLDNNMatrix buffers used in forward. + */ + void resetFwdBuffers(std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + /** + * reset the forward pipeline. + */ + void resetFwdPipeline(std::vector& pipeline, + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + + /** + * reset MKLDNNMatrix of input value + */ + void resetInValue(std::shared_ptr& pd, + MKLDNNMatrixPtr& in); + /** + * reset MKLDNNMatrix of weight and bias value + */ + void resetWgtBiasValue(std::shared_ptr& pd, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias); + /** + * reset MKLDNNMatrix of output value + */ + void resetOutValue(std::shared_ptr& pd, + MKLDNNMatrixPtr& out); + + /** + * reset the backward weight primitive descriptor. + */ + void resetBwdWgtPD(std::shared_ptr& pd); + /** + * reset the backward data primitive descriptor. + */ + void resetBwdDataPD(std::shared_ptr& pd); + /** + * reset the MKLDNNMatrix buffers used in backward. + */ + void resetBwdBuffers(std::shared_ptr& wgtPD, + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + /** + * reset the backward pipeline. + */ + void resetBwdPipeline(std::vector& pipeline, + std::shared_ptr& wgtPD, + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + + /** + * reset MKLDNNMatrix of output grad + */ + void resetOutGrad(std::shared_ptr& wgtPD, + MKLDNNMatrixPtr& out); + /** + * reset MKLDNNMatrix of weight and bias grad + */ + void resetWgtBiasGrad(std::shared_ptr& wgtPD, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias); + /** + * reset MKLDNNMatrix of input grad + */ + void resetInGrad(std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in); + /** + * reset MKLDNNMatrix of weight value for backward data + * since the primitive_desc would be different with wgtVal_ + */ + void resetWgtValBwdData(std::shared_ptr& dataPD, + MKLDNNMatrixPtr& wgt); + /** * get padding_r according to * https://github.com/01org/mkl-dnn/blob/master/tests/gtests/ -- GitLab