提交 94ea8ee0 编写于 作者: T tensor-tang

refine MKLDNNLayer logical: move forward and backward to MKLDNNLayer and...

refine MKLDNNLayer logical: move forward and backward to MKLDNNLayer and remove copyOutputInfoToOtherDevice
上级 f40d5f58
...@@ -14,7 +14,6 @@ limitations under the License. */ ...@@ -14,7 +14,6 @@ limitations under the License. */
#include "MKLDNNFcLayer.h" #include "MKLDNNFcLayer.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
using namespace mkldnn; // NOLINT using namespace mkldnn; // NOLINT
typedef memory::format format; typedef memory::format format;
...@@ -40,6 +39,8 @@ bool MKLDNNFcLayer::init(const LayerMap& layerMap, ...@@ -40,6 +39,8 @@ bool MKLDNNFcLayer::init(const LayerMap& layerMap,
oc_ = getSize(); oc_ = getSize();
oh_ = 1; oh_ = 1;
ow_ = 1; ow_ = 1;
ih_ = 1;
iw_ = 1;
// input size can not change in FC // input size can not change in FC
iLayerSize_ = inputLayers_[0]->getSize(); iLayerSize_ = inputLayers_[0]->getSize();
...@@ -78,36 +79,17 @@ void MKLDNNFcLayer::convertWeightsToPaddle() { ...@@ -78,36 +79,17 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
} }
void MKLDNNFcLayer::reshape() { void MKLDNNFcLayer::reshape() {
const Argument& input = getInput(0, getPrev(0)->getDeviceId()); reshapeInput();
int batchSize = input.getBatchSize();
if (bs_ == batchSize) {
return;
}
bs_ = batchSize;
ih_ = input.getFrameHeight();
iw_ = input.getFrameWidth();
if (ih_ == 0) {
ih_ = 1;
}
if (iw_ == 0) {
iw_ = 1;
}
CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize()); CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize());
ic_ = iLayerSize_ / (ih_ * iw_); ic_ = iLayerSize_ / (ih_ * iw_);
CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible"; CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible";
CHECK_EQ(size_t(oc_), getSize()); CHECK_EQ(size_t(oc_), getSize());
printSizeInfo();
// reset output reshapeOutput(oh_, ow_);
output_.setFrameHeight(oh_); resizeOutput(bs_, oc_);
output_.setFrameWidth(ow_);
resetOutput(bs_, oc_);
// reset mkldnn forward printSizeInfo();
resetFwd();
needResetBwd_ = true;
convertWeightsFromPaddle();
} }
void MKLDNNFcLayer::resetFwd() { void MKLDNNFcLayer::resetFwd() {
...@@ -137,7 +119,6 @@ void MKLDNNFcLayer::resetFwd() { ...@@ -137,7 +119,6 @@ void MKLDNNFcLayer::resetFwd() {
// change original output value to mkldnn output value // change original output value to mkldnn output value
output_.value = std::dynamic_pointer_cast<Matrix>(outVal_); output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
if (!outputIsOnlyMKLDNN()) { if (!outputIsOnlyMKLDNN()) {
copyOutputInfoToOtherDevice();
// fc cpu output value do not need create convert // fc cpu output value do not need create convert
// just share point // just share point
getOutput(CPU_DEVICE).value->setData(output_.value->getData()); getOutput(CPU_DEVICE).value->setData(output_.value->getData());
...@@ -243,51 +224,13 @@ void MKLDNNFcLayer::resetBwd() { ...@@ -243,51 +224,13 @@ void MKLDNNFcLayer::resetBwd() {
} }
void MKLDNNFcLayer::updateInputData() { void MKLDNNFcLayer::updateInputData() {
if (inputLayers_[0]->getType() != "data") { inVal_->setData(getInputValue(0, CPU_DEVICE)->getData());
return;
}
real* iData = getInputValue(0, CPU_DEVICE)->getData();
inVal_->setData(iData);
}
void MKLDNNFcLayer::forward(PassType passType) {
Layer::forward(passType);
reshape();
{
REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str());
updateInputData();
// just submit forward pipeline
stream_->submit(pipelineFwd_);
}
/* activation */ {
REGISTER_TIMER_INFO("FwActTimer", getName().c_str());
forwardActivation();
}
} }
void MKLDNNFcLayer::backward(const UpdateCallback& callback) { void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) {
/* Do derivation */ { weight_->getParameterPtr()->incUpdate(callback);
REGISTER_TIMER_INFO("BpActTimer", getName().c_str()); if (biases_ && biases_->getWGrad()) {
backwardActivation(); biases_->getParameterPtr()->incUpdate(callback);
}
{
REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str());
resetBwd();
// just sumbmit backward pipeline
stream_->submit(pipelineBwd_);
}
{
REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
weight_->getParameterPtr()->incUpdate(callback);
if (biases_ && biases_->getWGrad()) {
biases_->getParameterPtr()->incUpdate(callback);
}
} }
} }
} // namespace paddle } // namespace paddle
...@@ -45,35 +45,19 @@ public: ...@@ -45,35 +45,19 @@ public:
bool init(const LayerMap& layerMap, bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override; const ParameterMap& parameterMap) override;
void convertWeightsFromPaddle() override; void reshape() override;
void convertWeightsToPaddle() override;
void forward(PassType passType) override; void resetFwd() override;
void backward(const UpdateCallback& callback) override; void resetBwd() override;
void updateInputData() override; void updateInputData() override;
protected: void updateWeights(const UpdateCallback& callback) override;
/**
* reshape the input image sizes void convertWeightsFromPaddle() override;
* and reset output buffer size
* and reset mkldnn forward void convertWeightsToPaddle() override;
*/
void reshape();
/**
* reset the forward primitve and memory
* only would be called when input size changes
*/
void resetFwd();
/**
* reset the backward primitve and memory for mkldnn fc
* only would be called when needed
*/
void resetBwd();
}; };
} // namespace paddle } // namespace paddle
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include "MKLDNNBase.h" #include "MKLDNNBase.h"
#include "mkldnn.hpp" #include "mkldnn.hpp"
#include "paddle/math/MKLDNNMatrix.h" #include "paddle/math/MKLDNNMatrix.h"
#include "paddle/utils/Stat.h"
DECLARE_bool(use_mkldnn); DECLARE_bool(use_mkldnn);
...@@ -33,6 +34,8 @@ typedef std::shared_ptr<MKLDNNLayer> MKLDNNLayerPtr; ...@@ -33,6 +34,8 @@ typedef std::shared_ptr<MKLDNNLayer> MKLDNNLayerPtr;
*/ */
class MKLDNNLayer : public Layer { class MKLDNNLayer : public Layer {
protected: protected:
// input value element count
size_t inputElemenCnt_;
// batch size // batch size
int bs_; int bs_;
// input image channel, height and width // input image channel, height and width
...@@ -52,7 +55,7 @@ protected: ...@@ -52,7 +55,7 @@ protected:
std::vector<mkldnn::primitive> pipelineFwd_; std::vector<mkldnn::primitive> pipelineFwd_;
std::vector<mkldnn::primitive> pipelineBwd_; std::vector<mkldnn::primitive> pipelineBwd_;
// MKLDNNMatrixPtr // MKLDNNMatrixPtr with internal format
MKLDNNMatrixPtr inVal_; MKLDNNMatrixPtr inVal_;
MKLDNNMatrixPtr inGrad_; MKLDNNMatrixPtr inGrad_;
MKLDNNMatrixPtr outVal_; MKLDNNMatrixPtr outVal_;
...@@ -65,6 +68,7 @@ protected: ...@@ -65,6 +68,7 @@ protected:
public: public:
explicit MKLDNNLayer(const LayerConfig& config) explicit MKLDNNLayer(const LayerConfig& config)
: Layer(config), : Layer(config),
inputElemenCnt_(0),
bs_(0), bs_(0),
ic_(0), ic_(0),
ih_(0), ih_(0),
...@@ -95,12 +99,93 @@ public: ...@@ -95,12 +99,93 @@ public:
if (!Layer::init(layerMap, parameterMap)) { if (!Layer::init(layerMap, parameterMap)) {
return false; return false;
} }
checkCPUOutputsNumber();
stream_.reset(new MKLDNNStream()); stream_.reset(new MKLDNNStream());
engine_ = CPUEngine::Instance().getEngine(); engine_ = CPUEngine::Instance().getEngine();
return true; return true;
} }
void forward(PassType passType) override {
passType_ = passType;
{
REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str());
copySeqInfoToOutputs();
CHECK(!inputLayers_.empty());
size_t elemenCnt = inputLayers_[0]->getOutput().value->getElementCnt();
if (inputElemenCnt_ != elemenCnt) {
inputElemenCnt_ = elemenCnt;
reshape();
resetFwd();
convertWeightsFromPaddle();
needResetBwd_ = true;
}
if (inputLayers_[0]->getType() == "data") {
updateInputData();
}
stream_->submit(pipelineFwd_);
}
/* activation */ {
REGISTER_TIMER_INFO("FwActTimer", getName().c_str());
forwardActivation();
}
}
void backward(const UpdateCallback& callback) override {
/* Do derivation */ {
REGISTER_TIMER_INFO("BpActTimer", getName().c_str());
backwardActivation();
}
{
REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str());
if (needResetBwd_) {
resetBwd();
needResetBwd_ = false;
}
stream_->submit(pipelineBwd_);
}
{
REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
updateWeights(callback);
}
}
/**
* reshape the input image sizes
* and reset output image and buffer size
*/
virtual void reshape() = 0;
/**
* reset the mkldnn forward primitve and memory
* only would be called when input size changes
*/
virtual void resetFwd() = 0;
/**
* reset the mkldnn backward primitve and memory for mkldnn fc
* only would be called when needed
*/
virtual void resetBwd() = 0;
/**
* Update input value data when input layer is "data" type.
* Since the input value data address might be changed.
*/
virtual void updateInputData() {}
/**
* Update weights and biases if necessary.
*/
virtual void updateWeights(const UpdateCallback& callback) {}
/** /**
* convert weight from paddle format to mkldnn format * convert weight from paddle format to mkldnn format
* weight_ will be override * weight_ will be override
...@@ -114,10 +199,38 @@ public: ...@@ -114,10 +199,38 @@ public:
virtual void convertWeightsToPaddle() {} virtual void convertWeightsToPaddle() {}
/** /**
* Update input value data when input layer is "data" type. * add this interface as public for unit test
* Since the input value data address might be changed.
*/ */
virtual void updateInputData() {} void addOutputArgument(int deviceId) { Layer::addOutputArgument(deviceId); }
protected:
/**
* reshape the input image sizes and input batchsize
*/
virtual void reshapeInput() {
const Argument& input = inputLayers_[0]->getOutput();
bs_ = input.getBatchSize();
int height = input.getFrameHeight();
int width = input.getFrameWidth();
if (height != 0) {
ih_ = height;
}
if (width != 0) {
iw_ = width;
}
}
/**
* reshape output image sizes
*/
virtual void reshapeOutput(size_t height, size_t width) {
output_.setFrameHeight(height);
output_.setFrameWidth(width);
for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
outputOtherDevice_[i].setFrameHeight(height);
outputOtherDevice_[i].setFrameWidth(width);
}
}
/** /**
* print info about sizes * print info about sizes
...@@ -133,8 +246,8 @@ public: ...@@ -133,8 +246,8 @@ public:
*/ */
virtual void printValueFormatFlow() { virtual void printValueFormatFlow() {
if (inVal_ && outVal_) { if (inVal_ && outVal_) {
VLOG(MKLDNN_FMTS) << "value format flow --- " << inVal_->getFormat() VLOG(MKLDNN_FMTS) << inVal_->getFormat() << " >>> "
<< " >>> " << outVal_->getFormat(); << outVal_->getFormat();
} }
} }
...@@ -143,36 +256,12 @@ public: ...@@ -143,36 +256,12 @@ public:
*/ */
virtual void printGradFormatFlow() { virtual void printGradFormatFlow() {
if (inGrad_ && outGrad_) { if (inGrad_ && outGrad_) {
VLOG(MKLDNN_FMTS) << "grad format flow --- " << inGrad_->getFormat() VLOG(MKLDNN_FMTS) << inGrad_->getFormat() << " <<< "
<< " <<< " << outGrad_->getFormat(); << outGrad_->getFormat();
} }
} }
protected: protected:
/**
* copy image size and sequence info to other device
* @note: can not directly use Layer::copyOutputToOtherDevice since here only
* copy base info and do not copy data value
*/
void copyOutputInfoToOtherDevice() {
int cnt = 0;
for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
outputOtherDevice_[i].setFrameHeight(output_.getFrameHeight());
outputOtherDevice_[i].setFrameWidth(output_.getFrameWidth());
outputOtherDevice_[i].sequenceStartPositions =
output_.sequenceStartPositions;
outputOtherDevice_[i].subSequenceStartPositions =
output_.subSequenceStartPositions;
outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims;
if (outputOtherDevice_[i].deviceId == CPU_DEVICE) {
++cnt;
}
}
if (cnt > 1) {
LOG(WARNING) << "should not have more than one CPU devie";
}
}
/** /**
* If input only has MKLDNN device. * If input only has MKLDNN device.
* Otherwise, only support the previous layer using CPU device. * Otherwise, only support the previous layer using CPU device.
...@@ -205,6 +294,7 @@ protected: ...@@ -205,6 +294,7 @@ protected:
*/ */
void setDevice(int id) { deviceId_ = id; } void setDevice(int id) { deviceId_ = id; }
private:
/** /**
* Set deviceId of the params used in this layer. * Set deviceId of the params used in this layer.
*/ */
...@@ -228,6 +318,42 @@ protected: ...@@ -228,6 +318,42 @@ protected:
parameter->setDevice(id); parameter->setDevice(id);
} }
} }
/**
* Check the cpu device number of outputOtherDevice_.
* should have only one at most.
*/
void checkCPUOutputsNumber(int max = 1) {
int cnt = 0;
for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
if (outputOtherDevice_[i].deviceId == CPU_DEVICE) {
++cnt;
}
}
CHECK_LE(cnt, max) << "too much CPU devies";
}
/**
* copy SeqInfo from input layer to this output and other output devices.
* @note: do not use getInput(0) since it used this deviceId_,
* use "inputLayers_[0]->getOutput()" instead.
*/
void copySeqInfoToOutputs() {
if (inputLayers_.empty() || !needSequenceInfo_) {
return;
}
const Argument& input = inputLayers_[0]->getOutput();
output_.sequenceStartPositions = input.sequenceStartPositions;
output_.subSequenceStartPositions = input.subSequenceStartPositions;
output_.cpuSequenceDims = input.cpuSequenceDims;
for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
outputOtherDevice_[i].sequenceStartPositions =
output_.sequenceStartPositions;
outputOtherDevice_[i].subSequenceStartPositions =
output_.subSequenceStartPositions;
outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims;
}
}
}; };
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册