提交 171fee2c 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #4011 from tensor-tang/refine

Refine MKLDNNLayer and MKLDNNTester
...@@ -49,6 +49,12 @@ struct LayerState { ...@@ -49,6 +49,12 @@ struct LayerState {
}; };
typedef std::shared_ptr<LayerState> LayerStatePtr; typedef std::shared_ptr<LayerState> LayerStatePtr;
/// Paddle device ID, MKLDNN is -2, CPU is -1
enum PADDLE_DEVICE_ID {
MKLDNN_DEVICE = -2,
CPU_DEVICE = -1,
};
/** /**
* @brief Base class for layer. * @brief Base class for layer.
* Define necessary variables and functions for every layer. * Define necessary variables and functions for every layer.
...@@ -59,11 +65,6 @@ protected: ...@@ -59,11 +65,6 @@ protected:
LayerConfig config_; LayerConfig config_;
/// whether to use GPU /// whether to use GPU
bool useGpu_; bool useGpu_;
/// Paddle device ID, MKLDNN is -2, CPU is -1
enum PADDLE_DEVICE_ID {
MKLDNN_DEVICE = -2,
CPU_DEVICE = -1,
};
/// Device Id. MKLDNN is -2, CPU is -1, and GPU is 0, 1, 2 ... /// Device Id. MKLDNN is -2, CPU is -1, and GPU is 0, 1, 2 ...
int deviceId_; int deviceId_;
/// Input layers /// Input layers
......
...@@ -14,7 +14,6 @@ limitations under the License. */ ...@@ -14,7 +14,6 @@ limitations under the License. */
#include "MKLDNNFcLayer.h" #include "MKLDNNFcLayer.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
using namespace mkldnn; // NOLINT using namespace mkldnn; // NOLINT
typedef memory::format format; typedef memory::format format;
...@@ -40,6 +39,8 @@ bool MKLDNNFcLayer::init(const LayerMap& layerMap, ...@@ -40,6 +39,8 @@ bool MKLDNNFcLayer::init(const LayerMap& layerMap,
oc_ = getSize(); oc_ = getSize();
oh_ = 1; oh_ = 1;
ow_ = 1; ow_ = 1;
ih_ = 1;
iw_ = 1;
// input size can not change in FC // input size can not change in FC
iLayerSize_ = inputLayers_[0]->getSize(); iLayerSize_ = inputLayers_[0]->getSize();
...@@ -77,67 +78,53 @@ void MKLDNNFcLayer::convertWeightsToPaddle() { ...@@ -77,67 +78,53 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim); wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim);
} }
void MKLDNNFcLayer::reshape() { void MKLDNNFcLayer::reshape(
const Argument& input = getInput(0, getPrev(0)->getDeviceId()); int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) {
int batchSize = input.getBatchSize(); reshapeInput(bs, ih, iw);
if (bs_ == batchSize) {
return;
}
bs_ = batchSize;
ih_ = input.getFrameHeight();
iw_ = input.getFrameWidth();
if (ih_ == 0) {
ih_ = 1;
}
if (iw_ == 0) {
iw_ = 1;
}
CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize());
ic_ = iLayerSize_ / (ih_ * iw_);
CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible";
CHECK_EQ(size_t(oc_), getSize());
printSizeInfo();
// reset output CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize());
output_.setFrameHeight(oh_); ic = iLayerSize_ / (ih * iw);
output_.setFrameWidth(ow_); CHECK_EQ(size_t(ic * ih * iw), iLayerSize_) << "not divisible";
resetOutput(bs_, oc_); CHECK_EQ(size_t(oc), getSize());
// reset mkldnn forward reshapeOutput(oh, ow);
resetFwd(); resizeOutput(bs, oc);
needResetBwd_ = true;
convertWeightsFromPaddle(); printSizeInfo();
} }
void MKLDNNFcLayer::resetFwd() { void MKLDNNFcLayer::resetFwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
pipeline.clear();
bool hasBias = biases_ && biases_->getW(); bool hasBias = biases_ && biases_->getW();
const MatrixPtr& wgt = weight_->getW(); const MatrixPtr& wgtVal = weight_->getW();
const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr; const MatrixPtr& biasVal = hasBias ? biases_->getW() : nullptr;
const MatrixPtr& out = output_.value; const MatrixPtr& outVal = output_.value;
if (inputIsOnlyMKLDNN()) { if (inputIsOnlyMKLDNN()) {
const MatrixPtr& in = getInputValue(0); const MatrixPtr& inVal = getInputValue(0);
inVal_ = std::dynamic_pointer_cast<MKLDNNMatrix>(in); in = std::dynamic_pointer_cast<MKLDNNMatrix>(inVal);
CHECK(inVal_) << "Input should be MKLDNNMatrix"; CHECK(in) << "Input should be MKLDNNMatrix";
} else { } else {
CHECK_EQ(getPrev(0)->getDeviceId(), CPU_DEVICE) << "Only support CPU yet"; CHECK_EQ(getPrev(0)->getDeviceId(), CPU_DEVICE) << "Only support CPU yet";
const MatrixPtr& in = getInputValue(0, CPU_DEVICE); const MatrixPtr& inVal = getInputValue(0, CPU_DEVICE);
inVal_ = MKLDNNMatrix::create( in = MKLDNNMatrix::create(
in, memory::dims{bs_, ic_, ih_, iw_}, format::nchw, engine_); inVal, memory::dims{bs_, ic_, ih_, iw_}, format::nchw, engine_);
} }
inVal_->downSpatial(); in->downSpatial();
wgtVal_ = MKLDNNMatrix::create( wgt = MKLDNNMatrix::create(
wgt, memory::dims{oc_, ic_, ih_, iw_}, format::oihw, engine_); wgtVal, memory::dims{oc_, ic_, ih_, iw_}, format::oihw, engine_);
wgtVal_->downSpatial(); wgt->downSpatial();
biasVal_ = bias = hasBias ? MKLDNNMatrix::create(biasVal, {oc_}, format::x, engine_)
hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr; : nullptr;
outVal_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_); out = MKLDNNMatrix::create(outVal, {bs_, oc_}, format::nc, engine_);
// change original output value to mkldnn output value // change original output value to mkldnn output value
output_.value = std::dynamic_pointer_cast<Matrix>(outVal_); output_.value = std::dynamic_pointer_cast<Matrix>(out);
if (!outputIsOnlyMKLDNN()) { if (!outputIsOnlyMKLDNN()) {
copyOutputInfoToOtherDevice();
// fc cpu output value do not need create convert // fc cpu output value do not need create convert
// just share point // just share point
getOutput(CPU_DEVICE).value->setData(output_.value->getData()); getOutput(CPU_DEVICE).value->setData(output_.value->getData());
...@@ -146,27 +133,31 @@ void MKLDNNFcLayer::resetFwd() { ...@@ -146,27 +133,31 @@ void MKLDNNFcLayer::resetFwd() {
// create forward handle // create forward handle
prop_kind pk = prop_kind::forward; prop_kind pk = prop_kind::forward;
fc_fwd::desc fwdDesc = hasBias ? fc_fwd::desc(pk, fc_fwd::desc fwdDesc = hasBias ? fc_fwd::desc(pk,
inVal_->getMemoryDesc(), in->getMemoryDesc(),
wgtVal_->getMemoryDesc(), wgt->getMemoryDesc(),
biasVal_->getMemoryDesc(), bias->getMemoryDesc(),
outVal_->getMemoryDesc()) out->getMemoryDesc())
: fc_fwd::desc(pk, : fc_fwd::desc(pk,
inVal_->getMemoryDesc(), in->getMemoryDesc(),
wgtVal_->getMemoryDesc(), wgt->getMemoryDesc(),
outVal_->getMemoryDesc()); out->getMemoryDesc());
fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_);
if (hasBias) { if (hasBias) {
fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); fwd_.reset(new fc_fwd(fwdPD, *in, *wgt, *bias, *out));
} else { } else {
fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_)); fwd_.reset(new fc_fwd(fwdPD, *in, *wgt, *out));
} }
printValueFormatFlow(); printValueFormatFlow();
pipelineFwd_.clear(); pipeline.push_back(*fwd_);
pipelineFwd_.push_back(*fwd_);
} }
void MKLDNNFcLayer::resetBwd() { void MKLDNNFcLayer::resetBwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
pipeline.clear();
if (!needResetBwd_) { if (!needResetBwd_) {
return; return;
} }
...@@ -175,8 +166,8 @@ void MKLDNNFcLayer::resetBwd() { ...@@ -175,8 +166,8 @@ void MKLDNNFcLayer::resetBwd() {
/// backward weight /// backward weight
CHECK(inVal_) << "Should have input value"; CHECK(inVal_) << "Should have input value";
const MatrixPtr& wgt = weight_->getWGrad(); const MatrixPtr& wgtGrad = weight_->getWGrad();
const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr; const MatrixPtr& biasGrad = hasBias ? biases_->getWGrad() : nullptr;
// TODO(TJ): merge outgrad // TODO(TJ): merge outgrad
int device = outputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE; int device = outputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE;
...@@ -187,107 +178,66 @@ void MKLDNNFcLayer::resetBwd() { ...@@ -187,107 +178,66 @@ void MKLDNNFcLayer::resetBwd() {
// for CPU device: // for CPU device:
// fc do not need to convert from cpu device since output is always nc format // fc do not need to convert from cpu device since output is always nc format
// only need create from cpu device // only need create from cpu device
const MatrixPtr& out = getOutput(device).grad; const MatrixPtr& outGrad = getOutput(device).grad;
outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc()); out = MKLDNNMatrix::create(outGrad, outVal_->getPrimitiveDesc());
wgtGrad_ = MKLDNNMatrix::create(wgt, wgtVal_->getPrimitiveDesc()); wgt = MKLDNNMatrix::create(wgtGrad, wgtVal_->getPrimitiveDesc());
biasGrad_ = hasBias ? MKLDNNMatrix::create(bias, biasVal_->getPrimitiveDesc()) bias = hasBias ? MKLDNNMatrix::create(biasGrad, biasVal_->getPrimitiveDesc())
: nullptr; : nullptr;
// create memory primitive desc // create memory primitive desc
fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward,
inVal_->getMemoryDesc(), inVal_->getMemoryDesc(),
wgtGrad_->getMemoryDesc(), wgt->getMemoryDesc(),
outGrad_->getMemoryDesc()); out->getMemoryDesc());
fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_);
fc_bwdWgt::desc bwdWgtDesc = hasBias fc_bwdWgt::desc bwdWgtDesc = hasBias
? fc_bwdWgt::desc(inVal_->getMemoryDesc(), ? fc_bwdWgt::desc(inVal_->getMemoryDesc(),
wgtGrad_->getMemoryDesc(), wgt->getMemoryDesc(),
biasGrad_->getMemoryDesc(), bias->getMemoryDesc(),
outGrad_->getMemoryDesc()) out->getMemoryDesc())
: fc_bwdWgt::desc(inVal_->getMemoryDesc(), : fc_bwdWgt::desc(inVal_->getMemoryDesc(),
wgtGrad_->getMemoryDesc(), wgt->getMemoryDesc(),
outGrad_->getMemoryDesc()); out->getMemoryDesc());
fc_bwdWgt::primitive_desc bwdWgtPD = fc_bwdWgt::primitive_desc bwdWgtPD =
fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD);
if (hasBias) { if (hasBias) {
bwdWgt_.reset( bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt, *bias));
new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_, *biasGrad_));
} else { } else {
bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_)); bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt));
} }
pipelineBwd_.clear(); pipeline.push_back(*bwdWgt_);
pipelineBwd_.push_back(*bwdWgt_);
/// backward data /// backward data
const MatrixPtr& in = inputLayers_[0]->getOutput().grad; const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad;
if (in == nullptr) { if (inGrad == nullptr) {
return; return;
} }
if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) { if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) {
// TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done
} else { } else {
inGrad_ = MKLDNNMatrix::create(in, inVal_->getPrimitiveDesc()); in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc());
} }
fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(inVal_->getMemoryDesc(), fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(
wgtGrad_->getMemoryDesc(), inVal_->getMemoryDesc(), wgt->getMemoryDesc(), out->getMemoryDesc());
outGrad_->getMemoryDesc());
fc_bwdData::primitive_desc bwdDataPD = fc_bwdData::primitive_desc bwdDataPD =
fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD);
CHECK(wgtVal_) << "Should have weight memory"; CHECK(wgtVal_) << "Should have weight memory";
bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_)); bwdData_.reset(new fc_bwdData(bwdDataPD, *out, *wgtVal_, *in));
printGradFormatFlow(); printGradFormatFlow();
pipelineBwd_.push_back(*bwdData_); pipeline.push_back(*bwdData_);
} }
void MKLDNNFcLayer::updateInputData() { void MKLDNNFcLayer::updateInputData() {
if (inputLayers_[0]->getType() != "data") { inVal_->setData(getInputValue(0, CPU_DEVICE)->getData());
return;
}
real* iData = getInputValue(0, CPU_DEVICE)->getData();
inVal_->setData(iData);
} }
void MKLDNNFcLayer::forward(PassType passType) { void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) {
Layer::forward(passType); weight_->getParameterPtr()->incUpdate(callback);
reshape(); if (biases_ && biases_->getWGrad()) {
biases_->getParameterPtr()->incUpdate(callback);
{
REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str());
updateInputData();
// just submit forward pipeline
stream_->submit(pipelineFwd_);
}
/* activation */ {
REGISTER_TIMER_INFO("FwActTimer", getName().c_str());
forwardActivation();
}
}
void MKLDNNFcLayer::backward(const UpdateCallback& callback) {
/* Do derivation */ {
REGISTER_TIMER_INFO("BpActTimer", getName().c_str());
backwardActivation();
}
{
REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str());
resetBwd();
// just sumbmit backward pipeline
stream_->submit(pipelineBwd_);
}
{
REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
weight_->getParameterPtr()->incUpdate(callback);
if (biases_ && biases_->getWGrad()) {
biases_->getParameterPtr()->incUpdate(callback);
}
} }
} }
} // namespace paddle } // namespace paddle
...@@ -45,35 +45,28 @@ public: ...@@ -45,35 +45,28 @@ public:
bool init(const LayerMap& layerMap, bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override; const ParameterMap& parameterMap) override;
void convertWeightsFromPaddle() override; void reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override;
void convertWeightsToPaddle() override;
void forward(PassType passType) override; void resetFwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override;
void backward(const UpdateCallback& callback) override; void resetBwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override;
void updateInputData() override; void updateInputData() override;
protected: void updateWeights(const UpdateCallback& callback) override;
/**
* reshape the input image sizes void convertWeightsFromPaddle() override;
* and reset output buffer size
* and reset mkldnn forward void convertWeightsToPaddle() override;
*/
void reshape();
/**
* reset the forward primitve and memory
* only would be called when input size changes
*/
void resetFwd();
/**
* reset the backward primitve and memory for mkldnn fc
* only would be called when needed
*/
void resetBwd();
}; };
} // namespace paddle } // namespace paddle
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include "MKLDNNBase.h" #include "MKLDNNBase.h"
#include "mkldnn.hpp" #include "mkldnn.hpp"
#include "paddle/math/MKLDNNMatrix.h" #include "paddle/math/MKLDNNMatrix.h"
#include "paddle/utils/Stat.h"
DECLARE_bool(use_mkldnn); DECLARE_bool(use_mkldnn);
...@@ -33,6 +34,8 @@ typedef std::shared_ptr<MKLDNNLayer> MKLDNNLayerPtr; ...@@ -33,6 +34,8 @@ typedef std::shared_ptr<MKLDNNLayer> MKLDNNLayerPtr;
*/ */
class MKLDNNLayer : public Layer { class MKLDNNLayer : public Layer {
protected: protected:
// input value element count
size_t inputElemenCnt_;
// batch size // batch size
int bs_; int bs_;
// input image channel, height and width // input image channel, height and width
...@@ -52,7 +55,7 @@ protected: ...@@ -52,7 +55,7 @@ protected:
std::vector<mkldnn::primitive> pipelineFwd_; std::vector<mkldnn::primitive> pipelineFwd_;
std::vector<mkldnn::primitive> pipelineBwd_; std::vector<mkldnn::primitive> pipelineBwd_;
// MKLDNNMatrixPtr // MKLDNNMatrixPtr with internal format
MKLDNNMatrixPtr inVal_; MKLDNNMatrixPtr inVal_;
MKLDNNMatrixPtr inGrad_; MKLDNNMatrixPtr inGrad_;
MKLDNNMatrixPtr outVal_; MKLDNNMatrixPtr outVal_;
...@@ -65,6 +68,7 @@ protected: ...@@ -65,6 +68,7 @@ protected:
public: public:
explicit MKLDNNLayer(const LayerConfig& config) explicit MKLDNNLayer(const LayerConfig& config)
: Layer(config), : Layer(config),
inputElemenCnt_(0),
bs_(0), bs_(0),
ic_(0), ic_(0),
ih_(0), ih_(0),
...@@ -95,12 +99,104 @@ public: ...@@ -95,12 +99,104 @@ public:
if (!Layer::init(layerMap, parameterMap)) { if (!Layer::init(layerMap, parameterMap)) {
return false; return false;
} }
checkCPUOutputsNumber();
stream_.reset(new MKLDNNStream()); stream_.reset(new MKLDNNStream());
engine_ = CPUEngine::Instance().getEngine(); engine_ = CPUEngine::Instance().getEngine();
return true; return true;
} }
void forward(PassType passType) override {
passType_ = passType;
{
REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str());
CHECK(!inputLayers_.empty());
copySeqInfoToOutputs();
size_t elemenCnt = inputLayers_[0]->getOutput().value->getElementCnt();
if (inputElemenCnt_ != elemenCnt) {
// reset when input total sizes changed, not only the batchsize
inputElemenCnt_ = elemenCnt;
reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_);
resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_);
convertWeightsFromPaddle();
needResetBwd_ = true;
}
if (inputLayers_[0]->getType() == "data") {
updateInputData();
}
stream_->submit(pipelineFwd_);
}
/* activation */ {
REGISTER_TIMER_INFO("FwActTimer", getName().c_str());
forwardActivation();
}
}
void backward(const UpdateCallback& callback) override {
/* Do derivation */ {
REGISTER_TIMER_INFO("BpActTimer", getName().c_str());
backwardActivation();
}
{
REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str());
if (needResetBwd_) {
resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_);
needResetBwd_ = false;
}
stream_->submit(pipelineBwd_);
}
{
REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
updateWeights(callback);
}
}
/**
* reshape the input image sizes
* and reset output image and buffer size
* output channel can not be changed
*/
virtual void reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) = 0;
/**
* reset the mkldnn forward primitve and memory
* only would be called when input size changes
*/
virtual void resetFwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) = 0;
/**
* reset the mkldnn backward primitve and memory for mkldnn fc
* only would be called when needed
*/
virtual void resetBwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) = 0;
/**
* Update input value data when input layer is "data" type.
* Since the input value data address might be changed.
*/
virtual void updateInputData() {}
/**
* Update weights and biases if necessary.
*/
virtual void updateWeights(const UpdateCallback& callback) {}
/** /**
* convert weight from paddle format to mkldnn format * convert weight from paddle format to mkldnn format
* weight_ will be override * weight_ will be override
...@@ -114,10 +210,38 @@ public: ...@@ -114,10 +210,38 @@ public:
virtual void convertWeightsToPaddle() {} virtual void convertWeightsToPaddle() {}
/** /**
* Update input value data when input layer is "data" type. * add this interface as public for unit test
* Since the input value data address might be changed.
*/ */
virtual void updateInputData() {} void addOutputArgument(int deviceId) { Layer::addOutputArgument(deviceId); }
protected:
/**
* reshape the input image sizes and input batchsize
*/
virtual void reshapeInput(int& batchsize, int& height, int& width) {
const Argument& input = inputLayers_[0]->getOutput();
batchsize = input.getBatchSize();
int h = input.getFrameHeight();
int w = input.getFrameWidth();
if (h != 0) {
height = h;
}
if (w != 0) {
width = w;
}
}
/**
* reshape output image sizes
*/
virtual void reshapeOutput(size_t height, size_t width) {
output_.setFrameHeight(height);
output_.setFrameWidth(width);
for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
outputOtherDevice_[i].setFrameHeight(height);
outputOtherDevice_[i].setFrameWidth(width);
}
}
/** /**
* print info about sizes * print info about sizes
...@@ -133,8 +257,8 @@ public: ...@@ -133,8 +257,8 @@ public:
*/ */
virtual void printValueFormatFlow() { virtual void printValueFormatFlow() {
if (inVal_ && outVal_) { if (inVal_ && outVal_) {
VLOG(MKLDNN_FMTS) << "value format flow --- " << inVal_->getFormat() VLOG(MKLDNN_FMTS) << inVal_->getFormat() << " >>> "
<< " >>> " << outVal_->getFormat(); << outVal_->getFormat();
} }
} }
...@@ -143,36 +267,12 @@ public: ...@@ -143,36 +267,12 @@ public:
*/ */
virtual void printGradFormatFlow() { virtual void printGradFormatFlow() {
if (inGrad_ && outGrad_) { if (inGrad_ && outGrad_) {
VLOG(MKLDNN_FMTS) << "grad format flow --- " << inGrad_->getFormat() VLOG(MKLDNN_FMTS) << inGrad_->getFormat() << " <<< "
<< " <<< " << outGrad_->getFormat(); << outGrad_->getFormat();
} }
} }
protected: protected:
/**
* copy image size and sequence info to other device
* @note: can not directly use Layer::copyOutputToOtherDevice since here only
* copy base info and do not copy data value
*/
void copyOutputInfoToOtherDevice() {
int cnt = 0;
for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
outputOtherDevice_[i].setFrameHeight(output_.getFrameHeight());
outputOtherDevice_[i].setFrameWidth(output_.getFrameWidth());
outputOtherDevice_[i].sequenceStartPositions =
output_.sequenceStartPositions;
outputOtherDevice_[i].subSequenceStartPositions =
output_.subSequenceStartPositions;
outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims;
if (outputOtherDevice_[i].deviceId == CPU_DEVICE) {
++cnt;
}
}
if (cnt > 1) {
LOG(WARNING) << "should not have more than one CPU devie";
}
}
/** /**
* If input only has MKLDNN device. * If input only has MKLDNN device.
* Otherwise, only support the previous layer using CPU device. * Otherwise, only support the previous layer using CPU device.
...@@ -205,6 +305,7 @@ protected: ...@@ -205,6 +305,7 @@ protected:
*/ */
void setDevice(int id) { deviceId_ = id; } void setDevice(int id) { deviceId_ = id; }
private:
/** /**
* Set deviceId of the params used in this layer. * Set deviceId of the params used in this layer.
*/ */
...@@ -228,6 +329,42 @@ protected: ...@@ -228,6 +329,42 @@ protected:
parameter->setDevice(id); parameter->setDevice(id);
} }
} }
/**
* Check the cpu device number of outputOtherDevice_.
* should have only one at most.
*/
void checkCPUOutputsNumber(int max = 1) {
int cnt = 0;
for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
if (outputOtherDevice_[i].deviceId == CPU_DEVICE) {
++cnt;
}
}
CHECK_LE(cnt, max) << "too much CPU devies";
}
/**
* copy SeqInfo from input layer to this output and other output devices.
* @note: do not use getInput(0) since it used this deviceId_,
* use "inputLayers_[0]->getOutput()" instead.
*/
void copySeqInfoToOutputs() {
if (inputLayers_.empty() || !needSequenceInfo_) {
return;
}
const Argument& input = inputLayers_[0]->getOutput();
output_.sequenceStartPositions = input.sequenceStartPositions;
output_.subSequenceStartPositions = input.subSequenceStartPositions;
output_.cpuSequenceDims = input.cpuSequenceDims;
for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
outputOtherDevice_[i].sequenceStartPositions =
output_.sequenceStartPositions;
outputOtherDevice_[i].subSequenceStartPositions =
output_.subSequenceStartPositions;
outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims;
}
}
}; };
} // namespace paddle } // namespace paddle
...@@ -63,8 +63,12 @@ void MKLDNNTester::reset(const TestConfig& dnn, ...@@ -63,8 +63,12 @@ void MKLDNNTester::reset(const TestConfig& dnn,
initTestLayer( initTestLayer(
configs_[i], &(layerMaps_[i]), &(parameters_[i]), &(testLayers_[i])); configs_[i], &(layerMaps_[i]), &(parameters_[i]), &(testLayers_[i]));
} }
dnnLayer_ = testLayers_[DNN];
refLayer_ = testLayers_[REF]; refLayer_ = testLayers_[REF];
dnnLayer_ = std::dynamic_pointer_cast<MKLDNNLayer>(testLayers_[DNN]);
CHECK(dnnLayer_);
// for comparison with Paddle reference results,
// need manually add cpu device output for test
dnnLayer_->addOutputArgument(CPU_DEVICE);
EXPECT_EQ(dataLayers_[DNN].size(), dataLayers_[REF].size()); EXPECT_EQ(dataLayers_[DNN].size(), dataLayers_[REF].size());
EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size()); EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size());
...@@ -109,20 +113,22 @@ void MKLDNNTester::randomBotDatas() { ...@@ -109,20 +113,22 @@ void MKLDNNTester::randomBotDatas() {
void MKLDNNTester::randomTopDiffs() { void MKLDNNTester::randomTopDiffs() {
refLayer_->getOutputGrad()->randomizeUniform(); refLayer_->getOutputGrad()->randomizeUniform();
dnnLayer_->getOutputGrad()->copyFrom(*(refLayer_->getOutputGrad())); dnnLayer_->getOutput(CPU_DEVICE)
VLOG(lvl_) << "Random dom Backward Input, TopDiff: "; .grad->copyFrom(*(refLayer_->getOutputGrad()));
VLOG(lvl_) << "Random Backward Input, TopDiff: ";
printMatrix(refLayer_->getOutputGrad()); printMatrix(refLayer_->getOutputGrad());
} }
void MKLDNNTester::checkForward() { void MKLDNNTester::checkForward() {
printTopDatas();
double delta = compareMatrix(testLayers_[DNN]->getOutputValue(),
testLayers_[REF]->getOutputValue());
VLOG(MKLDNN_ALL) << "Check Forward"; VLOG(MKLDNN_ALL) << "Check Forward";
printTopDatas();
double delta = compareMatrix(dnnLayer_->getOutput(-1).value,
refLayer_->getOutputValue());
EXPECT_LE(fabs(delta), eps_); EXPECT_LE(fabs(delta), eps_);
} }
void MKLDNNTester::checkBackwardData() { void MKLDNNTester::checkBackwardData() {
VLOG(MKLDNN_ALL) << "Check Backward Data";
// TODO(TJ): uncomment me when batch norm ready // TODO(TJ): uncomment me when batch norm ready
// const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm"; // const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm";
for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) {
...@@ -144,14 +150,12 @@ void MKLDNNTester::checkBackwardData() { ...@@ -144,14 +150,12 @@ void MKLDNNTester::checkBackwardData() {
} }
void MKLDNNTester::checkBackwardWgts() { void MKLDNNTester::checkBackwardWgts() {
VLOG(MKLDNN_ALL) << "Check Backward Weight";
CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size());
vector<VectorPtr> dnnWgts; // used to temply save mkldnn weights vector<VectorPtr> dnnWgts; // used to temply save mkldnn weights
saveWgt(parameters_[DNN], dnnWgts); saveWgt(parameters_[DNN], dnnWgts);
const MKLDNNLayerPtr dnnlayer = dnnLayer_->convertWeightsToPaddle();
std::dynamic_pointer_cast<MKLDNNLayer>(dnnLayer_);
CHECK(dnnlayer);
dnnlayer->convertWeightsToPaddle();
for (size_t i = 0; i < parameters_[DNN].size(); ++i) { for (size_t i = 0; i < parameters_[DNN].size(); ++i) {
const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE);
const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE); const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE);
...@@ -189,38 +193,38 @@ void MKLDNNTester::restoreWgt(const vector<VectorPtr>& from, ...@@ -189,38 +193,38 @@ void MKLDNNTester::restoreWgt(const vector<VectorPtr>& from,
} }
// clear parameters grad // clear parameters grad
void MKLDNNTester::clearWgtDiffs() { void MKLDNNTester::clearWgtDiffs(size_t id) {
CHECK_LE(id, parameters_.size());
for (size_t n = 0; n < parameters_.size(); ++n) { for (size_t n = 0; n < parameters_.size(); ++n) {
for (size_t i = 0; i < parameters_[n].size(); ++i) { if (id == n || id == parameters_.size()) {
const VectorPtr& grad = parameters_[n][i]->getBuf(PARAMETER_GRADIENT); for (size_t i = 0; i < parameters_[n].size(); ++i) {
if (grad) { const VectorPtr& grad = parameters_[n][i]->getBuf(PARAMETER_GRADIENT);
grad->zeroMem(); if (grad) {
grad->zeroMem();
}
} }
} }
} }
} }
void MKLDNNTester::clearBotDiffs() { void MKLDNNTester::clearBotDiffs(size_t id) {
// dnn and ref CHECK_LE(id, dataLayers_.size());
for (size_t n = 0; n < dataLayers_.size(); ++n) { for (size_t n = 0; n < dataLayers_.size(); ++n) {
// all inputs layers if (id == n || id == dataLayers_.size()) {
for (size_t i = 0; i < dataLayers_[n].size(); ++i) { // clear inputs layers of this specific layer
dataLayers_[n][i]->getOutputGrad()->zeroMem(); for (size_t i = 0; i < dataLayers_[n].size(); ++i) {
dataLayers_[n][i]->getOutputGrad()->zeroMem();
}
} }
} }
} }
void MKLDNNTester::clearBotDiffs(int n) { void MKLDNNTester::clearTopDatas(size_t id) {
CHECK_LT(n, NUM); CHECK_LE(id, testLayers_.size());
// all inputs layers
for (size_t i = 0; i < dataLayers_[n].size(); ++i) {
dataLayers_[n][i]->getOutputGrad()->zeroMem();
}
}
void MKLDNNTester::clearTopDatas() {
for (size_t i = 0; i < testLayers_.size(); ++i) { for (size_t i = 0; i < testLayers_.size(); ++i) {
testLayers_[i]->getOutputValue()->zeroMem(); if (id == i || id == testLayers_.size()) {
testLayers_[i]->getOutputValue()->zeroMem();
}
} }
} }
...@@ -300,16 +304,24 @@ void MKLDNNTester::runOnce() { ...@@ -300,16 +304,24 @@ void MKLDNNTester::runOnce() {
checkForward(); checkForward();
// test backward // test backward
// simple updater
UpdateCallback updateCallback = [](Parameter* para) {
auto& grad = para->getBuf(PARAMETER_GRADIENT);
auto& value = para->getBuf(PARAMETER_VALUE);
real lr = 1e-3;
value->add(*grad, lr);
};
randomTopDiffs(); randomTopDiffs();
dnnLayer_->backward(nullptr); dnnLayer_->backward(updateCallback);
refLayer_->backward(nullptr); refLayer_->backward(updateCallback);
checkBackwardData(); checkBackwardData();
checkBackwardWgts(); checkBackwardWgts();
// clear buffers // clear buffers
// ref code will addto the diff, dnn code will writeto it // ref code will addto the diff, dnn code will writeto it
// and clearTopDatas() and clearWgtDiffs() should be coverd by test layers // and clearTopDatas(REF) should be coverd by ref layers
clearBotDiffs(REF); clearBotDiffs(REF);
clearWgtDiffs(REF);
} }
void MKLDNNTester::run(const TestConfig& dnn, void MKLDNNTester::run(const TestConfig& dnn,
......
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include <vector> #include <vector>
#include "LayerGradUtil.h" #include "LayerGradUtil.h"
#include "paddle/gserver/layers/MKLDNNBase.h" #include "paddle/gserver/layers/MKLDNNBase.h"
#include "paddle/gserver/layers/MKLDNNLayer.h"
namespace paddle { namespace paddle {
...@@ -40,7 +41,8 @@ protected: ...@@ -40,7 +41,8 @@ protected:
vector<LayerMap> layerMaps_; vector<LayerMap> layerMaps_;
vector<vector<ParameterPtr>> parameters_; vector<vector<ParameterPtr>> parameters_;
vector<LayerPtr> testLayers_; vector<LayerPtr> testLayers_;
LayerPtr dnnLayer_, refLayer_; LayerPtr refLayer_;
MKLDNNLayerPtr dnnLayer_;
/// run some iterations, all the result should pass /// run some iterations, all the result should pass
size_t iter_; size_t iter_;
...@@ -88,10 +90,10 @@ private: ...@@ -88,10 +90,10 @@ private:
void checkBackwardData(); void checkBackwardData();
void checkBackwardWgts(); void checkBackwardWgts();
void clearWgtDiffs(); // clear specific layer, clear all when id equals NUM
void clearBotDiffs(); void clearWgtDiffs(size_t id = NUM);
void clearBotDiffs(int n); // clear specific layer void clearBotDiffs(size_t id = NUM);
void clearTopDatas(); void clearTopDatas(size_t id = NUM);
void printTopDatas(); void printTopDatas();
void printMatrix(const MatrixPtr& m); void printMatrix(const MatrixPtr& m);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册