From 94ea8ee0e5e9f079cffb87f756d3274f522066e9 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Mon, 11 Sep 2017 16:23:20 +0800 Subject: [PATCH] refine MKLDNNLayer logical: move forward and backward to MKLDNNLayer and remove copyOutputInfoToOtherDevice --- paddle/gserver/layers/MKLDNNFcLayer.cpp | 81 ++-------- paddle/gserver/layers/MKLDNNFcLayer.h | 32 +--- paddle/gserver/layers/MKLDNNLayer.h | 190 ++++++++++++++++++++---- 3 files changed, 178 insertions(+), 125 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index 53433cef35..a47967b3d3 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -14,7 +14,6 @@ limitations under the License. */ #include "MKLDNNFcLayer.h" #include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" using namespace mkldnn; // NOLINT typedef memory::format format; @@ -40,6 +39,8 @@ bool MKLDNNFcLayer::init(const LayerMap& layerMap, oc_ = getSize(); oh_ = 1; ow_ = 1; + ih_ = 1; + iw_ = 1; // input size can not change in FC iLayerSize_ = inputLayers_[0]->getSize(); @@ -78,36 +79,17 @@ void MKLDNNFcLayer::convertWeightsToPaddle() { } void MKLDNNFcLayer::reshape() { - const Argument& input = getInput(0, getPrev(0)->getDeviceId()); - int batchSize = input.getBatchSize(); - if (bs_ == batchSize) { - return; - } - bs_ = batchSize; - ih_ = input.getFrameHeight(); - iw_ = input.getFrameWidth(); - if (ih_ == 0) { - ih_ = 1; - } - if (iw_ == 0) { - iw_ = 1; - } + reshapeInput(); + CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize()); ic_ = iLayerSize_ / (ih_ * iw_); CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible"; CHECK_EQ(size_t(oc_), getSize()); - printSizeInfo(); - // reset output - output_.setFrameHeight(oh_); - output_.setFrameWidth(ow_); - resetOutput(bs_, oc_); + reshapeOutput(oh_, ow_); + resizeOutput(bs_, oc_); - // reset mkldnn forward - resetFwd(); - needResetBwd_ = true; - - convertWeightsFromPaddle(); + printSizeInfo(); } void MKLDNNFcLayer::resetFwd() { @@ -137,7 +119,6 @@ void MKLDNNFcLayer::resetFwd() { // change original output value to mkldnn output value output_.value = std::dynamic_pointer_cast(outVal_); if (!outputIsOnlyMKLDNN()) { - copyOutputInfoToOtherDevice(); // fc cpu output value do not need create convert // just share point getOutput(CPU_DEVICE).value->setData(output_.value->getData()); @@ -243,51 +224,13 @@ void MKLDNNFcLayer::resetBwd() { } void MKLDNNFcLayer::updateInputData() { - if (inputLayers_[0]->getType() != "data") { - return; - } - real* iData = getInputValue(0, CPU_DEVICE)->getData(); - inVal_->setData(iData); -} - -void MKLDNNFcLayer::forward(PassType passType) { - Layer::forward(passType); - reshape(); - - { - REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str()); - updateInputData(); - - // just submit forward pipeline - stream_->submit(pipelineFwd_); - } - - /* activation */ { - REGISTER_TIMER_INFO("FwActTimer", getName().c_str()); - forwardActivation(); - } + inVal_->setData(getInputValue(0, CPU_DEVICE)->getData()); } -void MKLDNNFcLayer::backward(const UpdateCallback& callback) { - /* Do derivation */ { - REGISTER_TIMER_INFO("BpActTimer", getName().c_str()); - backwardActivation(); - } - - { - REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str()); - resetBwd(); - - // just sumbmit backward pipeline - stream_->submit(pipelineBwd_); - } - - { - REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); - weight_->getParameterPtr()->incUpdate(callback); - if (biases_ && biases_->getWGrad()) { - biases_->getParameterPtr()->incUpdate(callback); - } +void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) { + weight_->getParameterPtr()->incUpdate(callback); + if (biases_ && biases_->getWGrad()) { + biases_->getParameterPtr()->incUpdate(callback); } } } // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h index 4ad67a16e0..add8ac9991 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.h +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -45,35 +45,19 @@ public: bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; - void convertWeightsFromPaddle() override; - - void convertWeightsToPaddle() override; + void reshape() override; - void forward(PassType passType) override; + void resetFwd() override; - void backward(const UpdateCallback& callback) override; + void resetBwd() override; void updateInputData() override; -protected: - /** - * reshape the input image sizes - * and reset output buffer size - * and reset mkldnn forward - */ - void reshape(); - - /** - * reset the forward primitve and memory - * only would be called when input size changes - */ - void resetFwd(); - - /** - * reset the backward primitve and memory for mkldnn fc - * only would be called when needed - */ - void resetBwd(); + void updateWeights(const UpdateCallback& callback) override; + + void convertWeightsFromPaddle() override; + + void convertWeightsToPaddle() override; }; } // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 543364edce..c10f2fec2f 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -19,6 +19,7 @@ limitations under the License. */ #include "MKLDNNBase.h" #include "mkldnn.hpp" #include "paddle/math/MKLDNNMatrix.h" +#include "paddle/utils/Stat.h" DECLARE_bool(use_mkldnn); @@ -33,6 +34,8 @@ typedef std::shared_ptr MKLDNNLayerPtr; */ class MKLDNNLayer : public Layer { protected: + // input value element count + size_t inputElemenCnt_; // batch size int bs_; // input image channel, height and width @@ -52,7 +55,7 @@ protected: std::vector pipelineFwd_; std::vector pipelineBwd_; - // MKLDNNMatrixPtr + // MKLDNNMatrixPtr with internal format MKLDNNMatrixPtr inVal_; MKLDNNMatrixPtr inGrad_; MKLDNNMatrixPtr outVal_; @@ -65,6 +68,7 @@ protected: public: explicit MKLDNNLayer(const LayerConfig& config) : Layer(config), + inputElemenCnt_(0), bs_(0), ic_(0), ih_(0), @@ -95,12 +99,93 @@ public: if (!Layer::init(layerMap, parameterMap)) { return false; } + checkCPUOutputsNumber(); stream_.reset(new MKLDNNStream()); engine_ = CPUEngine::Instance().getEngine(); return true; } + void forward(PassType passType) override { + passType_ = passType; + + { + REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str()); + copySeqInfoToOutputs(); + CHECK(!inputLayers_.empty()); + size_t elemenCnt = inputLayers_[0]->getOutput().value->getElementCnt(); + if (inputElemenCnt_ != elemenCnt) { + inputElemenCnt_ = elemenCnt; + reshape(); + resetFwd(); + convertWeightsFromPaddle(); + needResetBwd_ = true; + } + + if (inputLayers_[0]->getType() == "data") { + updateInputData(); + } + + stream_->submit(pipelineFwd_); + } + + /* activation */ { + REGISTER_TIMER_INFO("FwActTimer", getName().c_str()); + forwardActivation(); + } + } + + void backward(const UpdateCallback& callback) override { + /* Do derivation */ { + REGISTER_TIMER_INFO("BpActTimer", getName().c_str()); + backwardActivation(); + } + + { + REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str()); + if (needResetBwd_) { + resetBwd(); + needResetBwd_ = false; + } + + stream_->submit(pipelineBwd_); + } + + { + REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); + updateWeights(callback); + } + } + + /** + * reshape the input image sizes + * and reset output image and buffer size + */ + virtual void reshape() = 0; + + /** + * reset the mkldnn forward primitve and memory + * only would be called when input size changes + */ + virtual void resetFwd() = 0; + + /** + * reset the mkldnn backward primitve and memory for mkldnn fc + * only would be called when needed + */ + virtual void resetBwd() = 0; + + /** + * Update input value data when input layer is "data" type. + * Since the input value data address might be changed. + */ + virtual void updateInputData() {} + + /** + * Update weights and biases if necessary. + */ + virtual void updateWeights(const UpdateCallback& callback) {} + /** * convert weight from paddle format to mkldnn format * weight_ will be override @@ -114,10 +199,38 @@ public: virtual void convertWeightsToPaddle() {} /** - * Update input value data when input layer is "data" type. - * Since the input value data address might be changed. + * add this interface as public for unit test */ - virtual void updateInputData() {} + void addOutputArgument(int deviceId) { Layer::addOutputArgument(deviceId); } + +protected: + /** + * reshape the input image sizes and input batchsize + */ + virtual void reshapeInput() { + const Argument& input = inputLayers_[0]->getOutput(); + bs_ = input.getBatchSize(); + int height = input.getFrameHeight(); + int width = input.getFrameWidth(); + if (height != 0) { + ih_ = height; + } + if (width != 0) { + iw_ = width; + } + } + + /** + * reshape output image sizes + */ + virtual void reshapeOutput(size_t height, size_t width) { + output_.setFrameHeight(height); + output_.setFrameWidth(width); + for (size_t i = 0; i < outputOtherDevice_.size(); i++) { + outputOtherDevice_[i].setFrameHeight(height); + outputOtherDevice_[i].setFrameWidth(width); + } + } /** * print info about sizes @@ -133,8 +246,8 @@ public: */ virtual void printValueFormatFlow() { if (inVal_ && outVal_) { - VLOG(MKLDNN_FMTS) << "value format flow --- " << inVal_->getFormat() - << " >>> " << outVal_->getFormat(); + VLOG(MKLDNN_FMTS) << inVal_->getFormat() << " >>> " + << outVal_->getFormat(); } } @@ -143,36 +256,12 @@ public: */ virtual void printGradFormatFlow() { if (inGrad_ && outGrad_) { - VLOG(MKLDNN_FMTS) << "grad format flow --- " << inGrad_->getFormat() - << " <<< " << outGrad_->getFormat(); + VLOG(MKLDNN_FMTS) << inGrad_->getFormat() << " <<< " + << outGrad_->getFormat(); } } protected: - /** - * copy image size and sequence info to other device - * @note: can not directly use Layer::copyOutputToOtherDevice since here only - * copy base info and do not copy data value - */ - void copyOutputInfoToOtherDevice() { - int cnt = 0; - for (size_t i = 0; i < outputOtherDevice_.size(); i++) { - outputOtherDevice_[i].setFrameHeight(output_.getFrameHeight()); - outputOtherDevice_[i].setFrameWidth(output_.getFrameWidth()); - outputOtherDevice_[i].sequenceStartPositions = - output_.sequenceStartPositions; - outputOtherDevice_[i].subSequenceStartPositions = - output_.subSequenceStartPositions; - outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims; - if (outputOtherDevice_[i].deviceId == CPU_DEVICE) { - ++cnt; - } - } - if (cnt > 1) { - LOG(WARNING) << "should not have more than one CPU devie"; - } - } - /** * If input only has MKLDNN device. * Otherwise, only support the previous layer using CPU device. @@ -205,6 +294,7 @@ protected: */ void setDevice(int id) { deviceId_ = id; } +private: /** * Set deviceId of the params used in this layer. */ @@ -228,6 +318,42 @@ protected: parameter->setDevice(id); } } + + /** + * Check the cpu device number of outputOtherDevice_. + * should have only one at most. + */ + void checkCPUOutputsNumber(int max = 1) { + int cnt = 0; + for (size_t i = 0; i < outputOtherDevice_.size(); i++) { + if (outputOtherDevice_[i].deviceId == CPU_DEVICE) { + ++cnt; + } + } + CHECK_LE(cnt, max) << "too much CPU devies"; + } + + /** + * copy SeqInfo from input layer to this output and other output devices. + * @note: do not use getInput(0) since it used this deviceId_, + * use "inputLayers_[0]->getOutput()" instead. + */ + void copySeqInfoToOutputs() { + if (inputLayers_.empty() || !needSequenceInfo_) { + return; + } + const Argument& input = inputLayers_[0]->getOutput(); + output_.sequenceStartPositions = input.sequenceStartPositions; + output_.subSequenceStartPositions = input.subSequenceStartPositions; + output_.cpuSequenceDims = input.cpuSequenceDims; + for (size_t i = 0; i < outputOtherDevice_.size(); i++) { + outputOtherDevice_[i].sequenceStartPositions = + output_.sequenceStartPositions; + outputOtherDevice_[i].subSequenceStartPositions = + output_.subSequenceStartPositions; + outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims; + } + } }; } // namespace paddle -- GitLab