From bfbd066fdd1c4a81266864bf837d89742b3f2ad6 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 29 Aug 2017 19:55:44 +0800 Subject: [PATCH] refine --- paddle/gserver/layers/MKLDNNFcLayer.cpp | 117 ++++++++++++------------ paddle/gserver/layers/MKLDNNFcLayer.h | 2 + paddle/gserver/layers/MKLDNNLayer.h | 48 +++++++--- paddle/math/MKLDNNMatrix.cpp | 25 ++--- paddle/math/MKLDNNMatrix.h | 29 +++--- 5 files changed, 118 insertions(+), 103 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index d38e6a2099..a08cca318e 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -77,6 +77,24 @@ void MKLDNNFcLayer::convertWeightsToPaddle() { wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim); } +void MKLDNNFcLayer::convertOutputToOtherDevice() { + copyOutputInfoToOtherDevice(); + // find other cpu device and reorder output to cpu device + int cnt = 0; + for (size_t i = 0; i < outputOtherDevice_.size(); i++) { + if (outputOtherDevice_[i].deviceId == CPU_DEVICE) { + // fc cpu output value do not need convert + // just share point + outputOtherDevice_[i].value = output_.value; + ++cnt; + } + } + + if (cnt > 1) { + LOG(WARNING) << "should not have more than one CPU devie"; + } +} + void MKLDNNFcLayer::reshape() { const Argument& input = getInput(0, getPrev(0)->getDeviceId()); int batchSize = input.getBatchSize(); @@ -116,7 +134,7 @@ void MKLDNNFcLayer::resetFwd() { const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr; const MatrixPtr& out = output_.value; - if (prevIsMKLDNN()) { + if (prevIsOnlyMKLDNN()) { const MatrixPtr& in = getInputValue(0); inVal_ = std::dynamic_pointer_cast(in); CHECK(inVal_) << "Input should be MKLDNNMatrix"; @@ -136,30 +154,21 @@ void MKLDNNFcLayer::resetFwd() { // change original output value to mkldnn output value output_.value = std::dynamic_pointer_cast(outVal_); - if (!nextIsMKLDNN()) { - Argument cpuOutput; - for (size_t i = 0; i < outputOtherDevice_.size(); i++) { - if (outputOtherDevice_[i].deviceId == CPU_DEVICE) { - cpuOutput = outputOtherDevice_[i]; - } - } - cpuOutput.setFrameHeight(output_.getFrameHeight()); - cpuOutput.setFrameWidth(output_.getFrameWidth()); - - // fc cpu output value do not need convert - cpuOutput.value = output_.value; + if (!nextIsOnlyMKLDNN()) { + convertOutputToOtherDevice(); } // create forward handle prop_kind pk = prop_kind::forward; - fc_fwd::desc fwdDesc = - hasBias ? fc_fwd::desc(pk, - inVal_->getMD(), - wgtVal_->getMD(), - biasVal_->getMD(), - outVal_->getMD()) - : fc_fwd::desc( - pk, inVal_->getMD(), wgtVal_->getMD(), outVal_->getMD()); + fc_fwd::desc fwdDesc = hasBias ? fc_fwd::desc(pk, + inVal_->getMemoryDesc(), + wgtVal_->getMemoryDesc(), + biasVal_->getMemoryDesc(), + outVal_->getMemoryDesc()) + : fc_fwd::desc(pk, + inVal_->getMemoryDesc(), + wgtVal_->getMemoryDesc(), + outVal_->getMemoryDesc()); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); if (hasBias) { fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); @@ -184,36 +193,38 @@ void MKLDNNFcLayer::resetBwd() { const MatrixPtr& wgt = weight_->getWGrad(); const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr; - // TODO(TJ): merge topdiffs - if (nextIsMKLDNN()) { + // TODO(TJ): merge outgrad + if (nextIsOnlyMKLDNN()) { // can not directly cast outputgrad to mkldnnmatrix, // since each layer can not write the inputgrad to mkldnn inputgrad. // So just create from matrix with outputvalue format. const MatrixPtr& out = getOutput(MKLDNN_DEVICE).grad; - outGrad_ = MKLDNNMatrix::create(out, outVal_->getPD()); + outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc()); } else { const MatrixPtr& out = getOutput(CPU_DEVICE).grad; // fc do not need to convert from cpu device since output always nc // only need create from cpu device - outGrad_ = MKLDNNMatrix::create(out, outVal_->getPD()); + outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc()); } - wgtGrad_ = MKLDNNMatrix::create(wgt, wgtVal_->getPD()); - biasGrad_ = hasBias ? MKLDNNMatrix::create(bias, biasVal_->getPD()) : nullptr; + wgtGrad_ = MKLDNNMatrix::create(wgt, wgtVal_->getPrimitiveDesc()); + biasGrad_ = hasBias ? MKLDNNMatrix::create(bias, biasVal_->getPrimitiveDesc()) + : nullptr; // create memory primitive desc fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, - inVal_->getMD(), - wgtGrad_->getMD(), - outGrad_->getMD()); + inVal_->getMemoryDesc(), + wgtGrad_->getMemoryDesc(), + outGrad_->getMemoryDesc()); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - fc_bwdWgt::desc bwdWgtDesc = - hasBias ? fc_bwdWgt::desc(inVal_->getMD(), - wgtGrad_->getMD(), - biasGrad_->getMD(), - outGrad_->getMD()) - : fc_bwdWgt::desc( - inVal_->getMD(), wgtGrad_->getMD(), outGrad_->getMD()); + fc_bwdWgt::desc bwdWgtDesc = hasBias + ? fc_bwdWgt::desc(inVal_->getMemoryDesc(), + wgtGrad_->getMemoryDesc(), + biasGrad_->getMemoryDesc(), + outGrad_->getMemoryDesc()) + : fc_bwdWgt::desc(inVal_->getMemoryDesc(), + wgtGrad_->getMemoryDesc(), + outGrad_->getMemoryDesc()); fc_bwdWgt::primitive_desc bwdWgtPD = fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); @@ -227,30 +238,20 @@ void MKLDNNFcLayer::resetBwd() { pipelineBwd_.push_back(*bwdWgt_); /// backward data - if (prevIsMKLDNN()) { - const MatrixPtr& in = getInputGrad(0, MKLDNN_DEVICE); - if (in == nullptr) { - return; - } - if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) { - // TODO(TJ): use outputMaps_ ways when merge topdiff done - } else { - inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD()); - } + int device = prevIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE; + const MatrixPtr& in = getInputGrad(0, device); + if (in == nullptr) { + return; + } + if (getInput(0, device).getAllCount() > 1) { + // TODO(TJ): use outputMaps_ ways when merge outgrad done } else { - const MatrixPtr& in = getInputGrad(0, CPU_DEVICE); - if (in == nullptr) { - return; - } - if (getInput(0, CPU_DEVICE).getAllCount() > 1) { - // TODO(TJ): use outputMaps_ ways when merge topdiff done - } else { - inGrad_ = MKLDNNMatrix::create(in, inVal_->getPD()); - } + inGrad_ = MKLDNNMatrix::create(in, inVal_->getPrimitiveDesc()); } - fc_bwdData::desc bwdDataDesc = - fc_bwdData::desc(inVal_->getMD(), wgtGrad_->getMD(), outGrad_->getMD()); + fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(inVal_->getMemoryDesc(), + wgtGrad_->getMemoryDesc(), + outGrad_->getMemoryDesc()); fc_bwdData::primitive_desc bwdDataPD = fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h index e2657a8d5e..e138a6faf1 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.h +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -72,6 +72,8 @@ protected: * only would be called when needed */ void resetBwd(); + + void convertOutputToOtherDevice() override; }; } // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 3dd17a36ff..8fe9630e82 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -86,10 +86,7 @@ public: CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." << "Please set WITH_MKLDNN=ON " << "and set use_mkldnn=True"; - if (useGpu_ == true) { - LOG(WARNING) << "Do not support GPU yet, will change to useGpu = false"; - useGpu_ = false; - } + CHECK(!useGpu_) << "Do not support GPU yet"; // set device id before Layer::init setDevice(MKLDNN_DEVICE); @@ -116,6 +113,12 @@ public: */ virtual void convertWeightsToPaddle() {} + /** + * convert MKLDNN output to other device. + * only support CPU device yet + */ + virtual void convertOutputToOtherDevice() {} + /** * print info about sizes */ @@ -147,22 +150,25 @@ public: protected: /** - * If next layer only has MKLDNN type. - * Otherwise, only support otherdevice CPU device. + * copy image size and sequence info to other device */ - bool nextIsMKLDNN() { + void copyOutputInfoToOtherDevice() { for (size_t i = 0; i < outputOtherDevice_.size(); i++) { - CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE) - << "Only support other device is CPU yet"; + outputOtherDevice_[i].setFrameHeight(output_.getFrameHeight()); + outputOtherDevice_[i].setFrameWidth(output_.getFrameWidth()); + outputOtherDevice_[i].sequenceStartPositions = + output_.sequenceStartPositions; + outputOtherDevice_[i].subSequenceStartPositions = + output_.subSequenceStartPositions; + outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims; } - return outputOtherDevice_.size() == 0; } /** - * Is previous layer MKLDNN type. - * Otherwise, only support otherdevice CPU device. + * Is previous layer only has MKLDNN type. + * Otherwise, only support the previous layer using CPU device. */ - bool prevIsMKLDNN(int index = 0) { + bool prevIsOnlyMKLDNN(int index = 0) { int prevDevice = getPrev(index)->getDeviceId(); if (prevDevice == MKLDNN_DEVICE) { return true; @@ -173,11 +179,23 @@ protected: } } + /** + * If output only has MKLDNN device. + * Otherwise, other devices should only using CPU device. + */ + bool nextIsOnlyMKLDNN() { + for (size_t i = 0; i < outputOtherDevice_.size(); i++) { + CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE) + << "Only support other device is CPU yet"; + } + return outputOtherDevice_.size() == 0; + } + /** * Sync input value data */ void syncInputValue() { - if (prevIsMKLDNN()) { + if (prevIsOnlyMKLDNN()) { return; } real* iData = getInputValue(0, CPU_DEVICE)->getData(); @@ -190,7 +208,7 @@ protected: * Sync output grad data */ void syncOutputGrad() { - if (nextIsMKLDNN()) { + if (nextIsOnlyMKLDNN()) { return; } diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp index 32ae3b1bcf..0a355e2644 100644 --- a/paddle/math/MKLDNNMatrix.cpp +++ b/paddle/math/MKLDNNMatrix.cpp @@ -31,7 +31,6 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) { if (m == nullptr) { size_t height = dims[0]; size_t width = cnts / dims[0]; - // LOG(INFO) << height << "," << width; m = Matrix::create(height, width, false, false); } @@ -40,10 +39,8 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) { CHECK(cpuMatrix) << "Only support create from CPU matrix yet"; CHECK_EQ(cnts, m->getElementCnt()) << "Count size does not match"; - size_t width = m->getWidth(); - size_t height = m->getHeight(); - real* data = m->getData(); - return std::make_shared(data, height, width, pd); + return std::make_shared( + m->getData(), m->getHeight(), m->getWidth(), pd); } MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, @@ -51,9 +48,7 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::format fmt, engine& eg, mkldnn::memory::data_type dtype) { - memory::desc md = memory::desc(dims, dtype, fmt); - memory::primitive_desc pd = memory::primitive_desc(md, eg); - return create(m, pd); + return create(m, memory::primitive_desc(memory::desc(dims, dtype, fmt), eg)); } void MKLDNNMatrix::reorderDataFrom(const MKLDNNMatrixPtr& m, @@ -64,9 +59,7 @@ void MKLDNNMatrix::reorderDataFrom(const MKLDNNMatrixPtr& m, return; } CHECK_EQ(getElementCnt(), m->getElementCnt()) << "size should equal"; - real* srcData = getData(); - real* dstData = m->getData(); - reorderOnce(srcData, dstData, srcFmt, dstFmt, targetDim); + reorderOnce(getData(), m->getData(), srcFmt, dstFmt, targetDim); } void MKLDNNMatrix::reorderDataTo(const MKLDNNMatrixPtr& m, @@ -77,9 +70,7 @@ void MKLDNNMatrix::reorderDataTo(const MKLDNNMatrixPtr& m, return; } CHECK_EQ(getElementCnt(), m->getElementCnt()) << "size should equal"; - real* srcData = getData(); - real* dstData = m->getData(); - reorderOnce(srcData, dstData, srcFmt, dstFmt, targetDim); + reorderOnce(getData(), m->getData(), srcFmt, dstFmt, targetDim); } void MKLDNNMatrix::reorderOnce(void* srcData, @@ -120,8 +111,9 @@ void MKLDNNMatrix::downSpatial() { return; } - memory::dims srcDims = getDims(); + // TODO(TJ): change H(height) and W(width) if support nhwc or more const int H = 2, W = 3; + memory::dims srcDims = getDims(); if (srcDims[H] != 1 || srcDims[W] != 1) { // can not down spatial return; @@ -141,13 +133,12 @@ void MKLDNNMatrix::downSpatial() { } memory::desc md = memory::desc(dstDims, getDtype(), dstFmt); memory::primitive_desc pd = memory::primitive_desc(md, getEngine()); - void* data = getData(); mkldnn_primitive_t result; mkldnn::error::wrap_c_api( mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr), "could not create a memory primitive"); reset(result); - set_data_handle(data); + set_data_handle(getData()); } } // namespace paddle diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index ea3fd7d461..e50f698b49 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -56,9 +56,9 @@ public: public: /** * Reorder this MKLDNNMatrix from other format. - * Support inplace reorder - * Pay attention: this function would only reorder the data layout. - * will NOT change this original dim or format info + * Support inplace reorder. + * @note: this function would only reorder the data layout. + * will NOT change this original dim or format info */ void reorderDataFrom(const MKLDNNMatrixPtr& m, memory::format srcFmt, @@ -66,9 +66,9 @@ public: /** * Reorder this MKLDNNMatrix to other format. - * Support inplace reorder - * Pay attention: this function would only reorder the data layout. - * will NOT change the dst dim or format info + * Support inplace reorder. + * @note: this function would only reorder the data layout. + * will NOT change the dst dim or format info */ void reorderDataTo(const MKLDNNMatrixPtr& m, memory::format dstFmt, @@ -90,18 +90,20 @@ public: /** * Get primitive descriptor. */ - mkldnn::memory::primitive_desc getPD() { return this->get_primitive_desc(); } + mkldnn::memory::primitive_desc getPrimitiveDesc() { + return this->get_primitive_desc(); + } /** * Get memory descriptor. */ - mkldnn::memory::desc getMD() { return getPD().desc(); } + mkldnn::memory::desc getMemoryDesc() { return getPrimitiveDesc().desc(); } /** * Get dimensions. */ mkldnn::memory::dims getDims() { - mkldnn::memory::desc md = getMD(); + mkldnn::memory::desc md = getMemoryDesc(); const int* src = md.data.dims; int ndims = md.data.ndims; mkldnn::memory::dims dst; @@ -116,24 +118,25 @@ public: * Get format. */ mkldnn::memory::format getFormat() { - return (mkldnn::memory::format)(getMD().data.format); + return (mkldnn::memory::format)(getMemoryDesc().data.format); } /** * Get memory data type. */ mkldnn::memory::data_type getDtype() { - return (mkldnn::memory::data_type)(getMD().data.data_type); + return (mkldnn::memory::data_type)(getMemoryDesc().data.data_type); } /** * Get engine. */ - mkldnn::engine getEngine() { return getPD().get_engine(); } + mkldnn::engine getEngine() { return getPrimitiveDesc().get_engine(); } protected: /** - * Do once reorder supported inplace. + * Do reorder once. + * Can support inplace. */ void reorderOnce(void* srcData, void* dstData, -- GitLab