未验证 提交 55bee85e 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #5779 from tensor-tang/refine

refine MKLDNNLayer
......@@ -38,12 +38,13 @@ bool MKLDNNAddtoLayer::init(const LayerMap& layerMap,
}
void MKLDNNAddtoLayer::reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) {
int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) {
CHECK_EQ(layerSize_, getSize()) << "this layer size can not be changed";
reshapeInput(bs, ih, iw);
ic = inputLayers_[0]->getSize() / ih / iw;
CHECK_EQ((size_t)ic * ih * iw, inputLayers_[0]->getSize());
CHECK_EQ(inputElemenCnt_, (size_t)bs * ic * ih * iw);
CHECK_EQ(inputLayers_[0]->getOutputValue()->getElementCnt(),
(size_t)bs * ic * ih * iw);
for (size_t i = 0; i < inputLayers_.size(); i++) {
CHECK_EQ(int64_t(bs), inputLayers_[i]->getOutput().getBatchSize());
CHECK_EQ(layerSize_, inputLayers_[i]->getSize());
......@@ -57,47 +58,43 @@ void MKLDNNAddtoLayer::reshape(
}
void MKLDNNAddtoLayer::resetFwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) {
resetFwdBuffers(inVals_, bias, out);
in = inVals_[0];
resetFwdBuffers(inputs, biasVal_, out);
std::shared_ptr<sum::primitive_desc> fwdPD;
std::shared_ptr<sum::primitive_desc> biasPD;
resetFwdPD(fwdPD, biasPD, inVals_, bias, out);
resetFwdPD(fwdPD, biasPD, inputs, biasVal_, out);
resetFwdPipeline(pipeline, fwdPD, biasPD, inVals_, bias, out);
resetFwdPipeline(pipeline, fwdPD, biasPD, inputs, biasVal_, out);
}
void MKLDNNAddtoLayer::resetBwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) {
resetBwdBuffers(inGrads_, bias, out);
in = inGrads_[0];
resetBwdBuffers(inputs, biasGrad_, out);
// backward only need share output grad to input grad
for (size_t i = 0; i < inGrads_.size(); i++) {
if (inGrads_[i] != nullptr) {
inGrads_[i] = out;
inputLayers_[i]->getOutputGrad()->setData(inGrads_[i]->getData());
for (size_t i = 0; i < inputs.size(); i++) {
if (inputs[i] != nullptr) {
inputs[i] = out;
inputLayers_[i]->getOutputGrad()->setData(inputs[i]->getData());
}
}
// backward bias
bwdBias_ = nullptr;
if (bias) {
if (biasGrad_) {
std::vector<float> scales(bs_, 1.0);
std::vector<memory::primitive_desc> srcPDs(bs_, bias->getPrimitiveDesc());
auto biasPD = sum::primitive_desc(bias->getMemoryDesc(), scales, srcPDs);
std::vector<memory::primitive_desc> srcPDs(bs_,
biasGrad_->getPrimitiveDesc());
auto biasPD =
sum::primitive_desc(biasGrad_->getMemoryDesc(), scales, srcPDs);
std::vector<primitive::at> srcs;
for (size_t i = 0; i < grads_.size(); ++i) {
srcs.push_back(*(grads_[i]));
}
bwdBias_.reset(new sum(biasPD, srcs, *bias));
bwdBias_.reset(new sum(biasPD, srcs, *biasGrad_));
pipeline.push_back(*bwdBias_);
}
}
......@@ -208,7 +205,7 @@ void MKLDNNAddtoLayer::resetBwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
inputs.resize(inputLayers_.size());
for (size_t i = 0; i < inputs.size(); i++) {
resetInGrad(inputs[i], inVal_->getPrimitiveDesc(), i);
resetInGrad(inputs[i], inVals_[i]->getPrimitiveDesc(), i);
CHECK_PRIMITIVE_DESC_EQ(inputs[i], out->getPrimitiveDesc());
}
......
......@@ -26,9 +26,6 @@ namespace paddle {
*/
class MKLDNNAddtoLayer : public MKLDNNLayer {
protected:
std::vector<MKLDNNMatrixPtr> inVals_;
std::vector<MKLDNNMatrixPtr> inGrads_;
// layer size == ic * ih * iw == oc * oh *ow, and can not be changed
size_t layerSize_;
......@@ -50,52 +47,19 @@ public:
const ParameterMap& parameterMap) override;
void reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override;
int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override;
void resetFwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) override;
void resetBwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) override;
void updateWeights(const UpdateCallback& callback) override;
void printValueFormat() override {
for (size_t i = 0; i < inVals_.size(); ++i) {
VLOG(MKLDNN_FMTS) << i << " input: " << inVals_[i]->getFormat() << " >>>";
}
if (outVal_) {
VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> ";
}
if (extOutVal_) {
VLOG(MKLDNN_FMTS) << extOutVal_->getFormat();
}
}
void printGradFormat() override {
if (extOutGrad_) {
VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat();
}
if (outGrad_) {
VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< ";
}
for (size_t i = 0; i < inGrads_.size(); ++i) {
VLOG(MKLDNN_FMTS) << i << " input: " << inGrads_[i]->getFormat() << "<<<";
}
}
protected:
/**
* Forward functions: reset buffers(inputs, output, bias),
* reset primitive descriptor,
* reset pipeline.
*/
void resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out);
......@@ -110,17 +74,10 @@ protected:
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out);
/**
* Backward functions: reset buffers(inputs, output, bias)
*/
void resetBwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out);
/**
* prepare for bias
*/
void prepareBias(MKLDNNMatrixPtr& bias,
const MatrixPtr& biasMat,
const MKLDNNMatrixPtr& out,
......
......@@ -116,21 +116,20 @@ void MKLDNNBatchNormLayer::calMovingMeanAndVar() {
}
void MKLDNNBatchNormLayer::reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) {
int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) {
reshapeInput(bs, ih, iw);
oh = ih;
ow = iw;
// ic_ and oc can not be changed
CHECK_EQ(inputElemenCnt_ / bs / ih / iw, (size_t)ic)
CHECK_EQ((size_t)ic,
inputLayers_[0]->getOutputValue()->getElementCnt() / bs / ih / iw)
<< "Input channel can not be changed";
reshapeOutput(oh, ow);
resizeOutput(bs, oc * oh * ow);
}
void MKLDNNBatchNormLayer::resetFwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) {
// In training phase, it will always calculate mean and var,
// so useGlobalStats must be false.
......@@ -140,25 +139,23 @@ void MKLDNNBatchNormLayer::resetFwd(std::vector<primitive>& pipeline,
useGlobalStats_ = false;
}
resetFwdBuffers(in, wgt, out);
resetFwdBuffers(inputs[0], wgtVal_, out);
resetFwdPD(fwdPD_, in, wgt, out);
resetFwdPD(fwdPD_, inputs[0], wgtVal_, out);
resetFwdPipeline(pipeline, fwdPD_, in, wgt, out);
resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, out);
}
void MKLDNNBatchNormLayer::resetBwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) {
std::shared_ptr<bn_bwd::primitive_desc> pd;
resetBwdBuffers(in, wgt, out);
resetBwdBuffers(inputs[0], wgtGrad_, out);
resetBwdPD(pd, in, wgt, out);
resetBwdPD(pd, inputs[0], wgtGrad_, out);
resetBwdPipeline(pipeline, pd, in, wgt, out);
resetBwdPipeline(pipeline, pd, inputs[0], wgtGrad_, out);
}
void MKLDNNBatchNormLayer::forward(PassType passType) {
......@@ -260,9 +257,9 @@ void MKLDNNBatchNormLayer::resetFwdPipeline(
void MKLDNNBatchNormLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& out) {
CHECK(inVal_ && outVal_);
CHECK(inVals_[0] && outVal_);
resetOutGrad(out, outVal_->getPrimitiveDesc());
resetInGrad(in, inVal_->getPrimitiveDesc());
resetInGrad(in, inVals_[0]->getPrimitiveDesc());
if (gradScaleShift_) {
CHECK(wgtVal_);
resetWithMatrix(wgt, gradScaleShift_, wgtVal_->getPrimitiveDesc());
......@@ -297,11 +294,12 @@ void MKLDNNBatchNormLayer::resetBwdPipeline(
if (pd == nullptr) {
return;
}
CHECK(inVal_);
CHECK(inVals_[0]);
bwdData_.reset(
wgt && wgtVal_
? new bn_bwd(*pd, *inVal_, *mean_, *var_, *out, *wgtVal_, *in, *wgt)
: new bn_bwd(*pd, *inVal_, *mean_, *var_, *out, *in));
? new bn_bwd(
*pd, *inVals_[0], *mean_, *var_, *out, *wgtVal_, *in, *wgt)
: new bn_bwd(*pd, *inVals_[0], *mean_, *var_, *out, *in));
pipeline.push_back(*bwdData_);
}
......
......@@ -73,18 +73,14 @@ public:
void forward(PassType passType) override;
void reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override;
int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override;
void resetFwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) override;
void resetBwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) override;
void updateWeights(const UpdateCallback& callback) override;
......@@ -98,11 +94,7 @@ protected:
* moving = moving * AvgFraction + local * (1 - AvgFraction)
*/
void calMovingMeanAndVar();
/**
* Forward functions: reset buffers(input, weight, output),
* reset primitive descriptor,
* reset pipeline.
*/
void resetFwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& out);
......@@ -115,12 +107,6 @@ protected:
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& out);
/**
* Backward functions: reset buffers(input, weight, output),
* reset primitive descriptor,
* reset pipeline.
*/
void resetBwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& out);
......
......@@ -32,17 +32,16 @@ bool MKLDNNConcatLayer::init(const LayerMap& layerMap,
}
void MKLDNNConcatLayer::reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) {
int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) {
reshapeInput(bs, ih, iw);
ic = inputLayers_[0]->getSize() / ih / iw;
CHECK_EQ((size_t)ic * ih * iw, inputLayers_[0]->getSize());
CHECK_EQ(inputElemenCnt_, (size_t)bs * ic * ih * iw);
CHECK_EQ(inputLayers_[0]->getOutputValue()->getElementCnt(),
(size_t)bs * ic * ih * iw);
CHECK_GT(inputLayers_.size(), 1UL);
channels_.resize(inputLayers_.size());
channels_[0] = ic;
// need change the output channel, so use oc_ instead
// TODO(TJ): change API, use &oc
oc_ = ic;
oc = ic;
for (size_t i = 1; i < inputLayers_.size(); i++) {
int batchsize, height, witdh;
reshapeInput(batchsize, height, witdh, i);
......@@ -52,37 +51,31 @@ void MKLDNNConcatLayer::reshape(
channels_[i] = inputLayers_[i]->getSize() / height / witdh;
CHECK_EQ((size_t)channels_[i] * height * witdh, inputLayers_[i]->getSize());
oc_ += channels_[i];
oc += channels_[i];
}
oh = ih;
ow = iw;
reshapeOutput(oh, ow);
resizeOutput(bs, oc_ * oh * ow);
resizeOutput(bs, oc * oh * ow);
}
void MKLDNNConcatLayer::resetFwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) {
resetFwdBuffers(inVals_, out);
in = inVals_[0];
resetFwdBuffers(inputs, out);
std::shared_ptr<concat::primitive_desc> fwdPD;
resetFwdPD(fwdPD, inVals_, out);
resetFwdPD(fwdPD, inputs, out);
resetFwdPipeline(pipeline, fwdPD, inVals_, out);
resetFwdPipeline(pipeline, fwdPD, inputs, out);
}
void MKLDNNConcatLayer::resetBwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) {
resetBwdBuffers(inGrads_, out);
in = inGrads_[0];
resetBwdBuffers(inputs, out);
resetBwdPipeline(pipeline, bwds_, inGrads_, out);
resetBwdPipeline(pipeline, bwds_, inputs, out);
}
void MKLDNNConcatLayer::resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
......@@ -90,10 +83,7 @@ void MKLDNNConcatLayer::resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
inputs.resize(inputLayers_.size());
bool has8c = false, has16c = false, hasnc = false;
for (size_t i = 0; i < inputs.size(); i++) {
// resetInValue will use ic_ so temporary change as current input's channel
// TODO(TJ): change ic_ as vector then can remove channels_
ic_ = channels_[i];
resetInValue(inputs[i], nullptr, i);
resetInValue(inputs[i], nullptr, i, channels_[i]);
CHECK(inputs[i]);
auto dm = inputs[i]->getDims();
// inputs format can be different, but ndims must equal
......@@ -114,8 +104,6 @@ void MKLDNNConcatLayer::resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
has16c = true;
}
}
// change back, ic_ always save the input 0 size
ic_ = channels_[0];
format outFmt;
if (has16c && oc_ % 16 == 0) {
......@@ -168,14 +156,9 @@ void MKLDNNConcatLayer::resetBwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
inputs.resize(inputLayers_.size());
for (size_t i = 0; i < inputs.size(); i++) {
CHECK(inVals_[i]);
// resetInGrad will use inVal_
// TODO(TJ): change move inVals_ to MKLDNNLayer ans remove inVal_
inVal_ = inVals_[i];
resetInGrad(inputs[i], inVals_[i]->getPrimitiveDesc(), i);
CHECK_PRIMITIVE_DESC_EQ(inputs[i], inVals_[i]->getPrimitiveDesc());
}
// change back, inVal_ always save the input 0
inVal_ = inVals_[0];
}
void MKLDNNConcatLayer::resetBwdPipeline(
......
......@@ -26,8 +26,6 @@ namespace paddle {
*/
class MKLDNNConcatLayer : public MKLDNNLayer {
protected:
std::vector<MKLDNNMatrixPtr> inVals_;
std::vector<MKLDNNMatrixPtr> inGrads_;
std::vector<std::shared_ptr<mkldnn::primitive>> bwds_;
// input channel numbers
std::vector<int> channels_;
......@@ -47,18 +45,14 @@ public:
const ParameterMap& parameterMap) override;
void reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override;
int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override;
void resetFwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) override;
void resetBwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) override;
void printSizeInfo() override {
......@@ -72,38 +66,16 @@ public:
<< ", " << ow_;
}
void printValueFormat() override {
for (size_t i = 0; i < inVals_.size(); ++i) {
VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName()
<< ": " << inVals_[i]->getFormat() << " >>>";
}
if (outVal_) {
VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> ";
}
if (extOutVal_) {
VLOG(MKLDNN_FMTS) << extOutVal_->getFormat();
}
}
void printGradFormat() override {
if (extOutGrad_) {
VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat();
}
if (outGrad_) {
VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< ";
}
for (size_t i = 0; i < inGrads_.size(); ++i) {
VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName()
<< ": " << inGrads_[i]->getFormat() << "<<<";
size_t keepCondition() {
// reset when the total element size of all inputs changed
size_t totalSize = inputLayers_[0]->getOutputValue()->getElementCnt();
for (size_t i = 1; i < inputLayers_.size(); ++i) {
totalSize += inputLayers_[i]->getOutputValue()->getElementCnt();
}
return totalSize;
}
protected:
/**
* Forward functions: reset buffers(inputs, output, bias),
* reset primitive descriptor,
* reset pipeline.
*/
void resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out);
void resetFwdPD(std::shared_ptr<mkldnn::concat::primitive_desc>& pd,
......@@ -113,11 +85,6 @@ protected:
std::shared_ptr<mkldnn::concat::primitive_desc>& pd,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out);
/**
* Backward functions: reset buffers(inputs, output, bias)
* reset primitives and pipeline
*/
void resetBwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out);
void resetBwdPipeline(std::vector<mkldnn::primitive>& pipeline,
......
......@@ -90,7 +90,7 @@ void MKLDNNConvLayer::convertWeightsToPaddle() {
}
void MKLDNNConvLayer::reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) {
int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) {
reshapeInput(bs, ih, iw);
// cal output sizes
......@@ -105,21 +105,17 @@ void MKLDNNConvLayer::reshape(
}
void MKLDNNConvLayer::resetFwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) {
resetFwdPD(fwdPD_);
resetFwdBuffers(fwdPD_, in, wgt, bias, out);
resetFwdBuffers(fwdPD_, inputs[0], wgtVal_, biasVal_, out);
resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out);
resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, biasVal_, out);
}
void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) {
std::shared_ptr<conv_bwdWgt::primitive_desc> bwdWgtPD;
std::shared_ptr<conv_bwdData::primitive_desc> bwdDataPD;
......@@ -128,9 +124,10 @@ void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline,
resetBwdDataPD(bwdDataPD);
resetBwdBuffers(bwdWgtPD, bwdDataPD, in, wgt, bias, out);
resetBwdBuffers(bwdWgtPD, bwdDataPD, inputs[0], wgtGrad_, biasGrad_, out);
resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out);
resetBwdPipeline(
pipeline, bwdWgtPD, bwdDataPD, inputs[0], wgtGrad_, biasGrad_, out);
}
void MKLDNNConvLayer::updateWeights(const UpdateCallback& callback) {
......@@ -236,14 +233,14 @@ void MKLDNNConvLayer::resetBwdWgtPD(
loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR);
// create backward weight using input, output and weight value memory desc
CHECK(inVal_) << "Should have internal input value";
CHECK(inVals_[0]) << "Should have internal input value";
CHECK(outVal_) << "Should have internal output value";
CHECK(wgtVal_) << "Should have weight value";
algorithm algo = algorithm::convolution_direct;
padding_kind padKind = padding_kind::zero;
auto bwdWgtDesc = biasVal_ != nullptr
? conv_bwdWgt::desc(algo,
inVal_->getMemoryDesc(),
inVals_[0]->getMemoryDesc(),
wgtVal_->getMemoryDesc(),
biasVal_->getMemoryDesc(),
outVal_->getMemoryDesc(),
......@@ -252,7 +249,7 @@ void MKLDNNConvLayer::resetBwdWgtPD(
padR,
padKind)
: conv_bwdWgt::desc(algo,
inVal_->getMemoryDesc(),
inVals_[0]->getMemoryDesc(),
wgtVal_->getMemoryDesc(),
outVal_->getMemoryDesc(),
strides,
......@@ -260,7 +257,7 @@ void MKLDNNConvLayer::resetBwdWgtPD(
padR,
padKind);
pd.reset(new conv_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_));
CHECK_PRIMITIVE_DESC_EQ(inVal_, pd->src_primitive_desc());
CHECK_PRIMITIVE_DESC_EQ(inVals_[0], pd->src_primitive_desc());
CHECK_PRIMITIVE_DESC_EQ(
outVal_,
pd->diff_dst_primitive_desc(),
......@@ -280,12 +277,12 @@ void MKLDNNConvLayer::resetBwdDataPD(
memory::dims wgtDims, biasDims, strides, dilations, padL, padR;
loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR);
CHECK(inVal_) << "Should have internal input value";
CHECK(inVals_[0]) << "Should have internal input value";
CHECK(outVal_) << "Should have internal output value";
// create backward data using input and output value memory desc
// but using weight memory desc with any format
auto bwdDataDesc = conv_bwdData::desc(algorithm::convolution_direct,
inVal_->getMemoryDesc(),
inVals_[0]->getMemoryDesc(),
MKLDNNMatrix::createMemoryDesc(wgtDims),
outVal_->getMemoryDesc(),
strides,
......@@ -294,7 +291,7 @@ void MKLDNNConvLayer::resetBwdDataPD(
padding_kind::zero);
pd.reset(new conv_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_));
CHECK_PRIMITIVE_DESC_EQ(
inVal_,
inVals_[0],
pd->diff_src_primitive_desc(),
"primitive desc of in value and grad should be equal");
CHECK_PRIMITIVE_DESC_EQ(
......@@ -346,12 +343,12 @@ void MKLDNNConvLayer::resetBwdPipeline(
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
CHECK(inVal_);
CHECK(inVals_[0]);
// add bwdWgt handle
if (bias) {
bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt, *bias));
bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVals_[0], *out, *wgt, *bias));
} else {
bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt));
bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVals_[0], *out, *wgt));
}
pipeline.push_back(*bwdWgt_);
......
......@@ -69,18 +69,14 @@ public:
const ParameterMap& parameterMap) override;
void reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override;
int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override;
void resetFwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) override;
void resetBwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) override;
void updateWeights(const UpdateCallback& callback) override;
......@@ -107,48 +103,26 @@ protected:
mkldnn::memory::dims& padL,
mkldnn::memory::dims& padR);
/**
* reset the forward primitive descriptor.
*/
void resetFwdPD(std::shared_ptr<conv_fwd::primitive_desc>& pd);
/**
* reset the MKLDNNMatrix buffers used in forward.
*/
void resetFwdBuffers(std::shared_ptr<conv_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out);
/**
* reset the forward pipeline.
*/
void resetFwdPipeline(std::vector<mkldnn::primitive>& pipeline,
std::shared_ptr<conv_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out);
/**
* reset the backward weight primitive descriptor.
*/
void resetBwdWgtPD(std::shared_ptr<conv_bwdWgt::primitive_desc>& pd);
/**
* reset the backward data primitive descriptor.
*/
void resetBwdDataPD(std::shared_ptr<conv_bwdData::primitive_desc>& pd);
/**
* reset the MKLDNNMatrix buffers used in backward.
*/
void resetBwdBuffers(std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out);
/**
* reset the backward pipeline.
*/
void resetBwdPipeline(std::vector<mkldnn::primitive>& pipeline,
std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
......
......@@ -74,7 +74,7 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
}
void MKLDNNFcLayer::reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) {
int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) {
reshapeInput(bs, ih, iw);
CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize());
......@@ -87,32 +87,29 @@ void MKLDNNFcLayer::reshape(
}
void MKLDNNFcLayer::resetFwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) {
resetFwdBuffers(in, wgt, bias, out);
resetFwdBuffers(inputs[0], wgtVal_, biasVal_, out);
resetFwdPD(fwdPD_, in, wgt, bias, out);
resetFwdPD(fwdPD_, inputs[0], wgtVal_, biasVal_, out);
resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out);
resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, biasVal_, out);
}
void MKLDNNFcLayer::resetBwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) {
std::shared_ptr<fc_bwdWgt::primitive_desc> bwdWgtPD;
std::shared_ptr<fc_bwdData::primitive_desc> bwdDataPD;
resetBwdBuffers(in, wgt, bias, out);
resetBwdBuffers(inputs[0], wgtGrad_, biasGrad_, out);
resetBwdWgtPD(bwdWgtPD, wgt, bias, out);
resetBwdWgtPD(bwdWgtPD, wgtGrad_, biasGrad_, out);
resetBwdDataPD(bwdDataPD, in, out);
resetBwdDataPD(bwdDataPD, inputs[0], out);
resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out);
resetBwdPipeline(
pipeline, bwdWgtPD, bwdDataPD, inputs[0], wgtGrad_, biasGrad_, out);
}
void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) {
......@@ -193,9 +190,9 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
CHECK(inVal_ && outVal_);
CHECK(inVals_[0] && outVal_);
resetOutGrad(out, outVal_->getPrimitiveDesc());
resetInGrad(in, inVal_->getPrimitiveDesc());
resetInGrad(in, inVals_[0]->getPrimitiveDesc());
CHECK(wgtVal_);
resetWithMatrix(wgt, weight_->getWGrad(), wgtVal_->getPrimitiveDesc());
......@@ -212,14 +209,15 @@ void MKLDNNFcLayer::resetBwdWgtPD(
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
CHECK(inVal_);
fc_bwdWgt::desc bwdWgtDesc = bias ? fc_bwdWgt::desc(inVal_->getMemoryDesc(),
wgt->getMemoryDesc(),
bias->getMemoryDesc(),
out->getMemoryDesc())
: fc_bwdWgt::desc(inVal_->getMemoryDesc(),
wgt->getMemoryDesc(),
out->getMemoryDesc());
CHECK(inVals_[0]);
fc_bwdWgt::desc bwdWgtDesc =
bias ? fc_bwdWgt::desc(inVals_[0]->getMemoryDesc(),
wgt->getMemoryDesc(),
bias->getMemoryDesc(),
out->getMemoryDesc())
: fc_bwdWgt::desc(inVals_[0]->getMemoryDesc(),
wgt->getMemoryDesc(),
out->getMemoryDesc());
pd.reset(new fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_));
}
......@@ -245,11 +243,11 @@ void MKLDNNFcLayer::resetBwdPipeline(
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
CHECK(inVal_);
CHECK(inVals_[0]);
if (bias) {
bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt, *bias));
bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVals_[0], *out, *wgt, *bias));
} else {
bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt));
bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVals_[0], *out, *wgt));
}
pipeline.push_back(*bwdWgt_);
......
......@@ -52,18 +52,14 @@ public:
const ParameterMap& parameterMap) override;
void reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override;
int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override;
void resetFwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) override;
void resetBwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) override;
void updateWeights(const UpdateCallback& callback) override;
......@@ -73,11 +69,6 @@ public:
void convertWeightsToPaddle() override;
protected:
/**
* Forward functions: reset buffers(input, output, weight and bias),
* reset primitive descriptor,
* reset pipeline.
*/
void resetFwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
......@@ -93,13 +84,6 @@ protected:
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out);
/**
* Backward functions: reset buffers(input, output, weight and bias),
* reset primitive descriptor for backward weight,
* reset primitive descriptor for backward data,
* reset pipeline.
*/
void resetBwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
......
......@@ -48,31 +48,20 @@ void MKLDNNLayer::forward(PassType passType) {
REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str());
CHECK(!inputLayers_.empty());
copySeqInfoToOutputs();
size_t elemenCnt = inputLayers_[0]->getOutputValue()->getElementCnt();
if (inputElemenCnt_ != elemenCnt) {
if (condition_ != keepCondition()) {
VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward";
// reset when input total sizes changed, not only the batchsize
inputElemenCnt_ = elemenCnt;
pipelineFwd_.clear();
condition_ = keepCondition();
reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_);
// all cpu device output grad or value share output's
printSizeInfo();
// the output_.value and output_.grad are shared with CPU device
shareCPUDevice();
resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_);
// MKLDNNLayer output value should be MKLDNNMatrix
// so external output value is necessary.
// Then external input value is not necessary,
// since input may be mkldnn internal buffer.
CHECK(extOutVal_) << "external output value is necessary";
output_.value = std::dynamic_pointer_cast<Matrix>(extOutVal_);
CHECK(inVal_ && outVal_) << "internal memories are necessary";
if (cvtInVal_) {
pipelineFwd_.insert(pipelineFwd_.begin(), *cvtInVal_);
}
if (cvtOutVal_) {
pipelineFwd_.push_back(*cvtOutVal_);
}
pipelineFwd_.clear();
inVals_.resize(inputLayers_.size(), nullptr);
extInVals_.resize(inputLayers_.size(), nullptr);
cvtInVals_.resize(inputLayers_.size(), nullptr);
resetFwd(pipelineFwd_, inVals_, outVal_);
prepareValueConversions(pipelineFwd_);
convertWeightsFromPaddle();
printSizeInfo();
printValueFormat();
needResetBwd_ = true;
}
......@@ -80,8 +69,8 @@ void MKLDNNLayer::forward(PassType passType) {
if (inputLayers_[0]->getType() == "data" && inputLayers_.size() == 1) {
// Update input value data when input layer is "data" type,
// since the input value data address might be changed.
CHECK(extInVal_);
extInVal_->setData(getInputValue(0, CPU_DEVICE)->getData());
CHECK(extInVals_[0]);
extInVals_[0]->setData(getInputValue(0, CPU_DEVICE)->getData());
}
if (!outputOnlyMKLDNN_) {
......@@ -99,22 +88,13 @@ void MKLDNNLayer::backward(const UpdateCallback& callback) {
if (needResetBwd_) {
VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward";
pipelineBwd_.clear();
inGrads_.resize(inputLayers_.size(), nullptr);
extInGrads_.resize(inputLayers_.size(), nullptr);
cvtInGrads_.resize(inputLayers_.size(), nullptr);
pipelineMergeGrad_.clear();
mergeGrad_ = nullptr;
resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_);
// external output grad is not necessary
// since output may be mkldnn internal buffer or merge them directly.
CHECK(outGrad_) << "internal output grad is necessary";
if (extOutGrad_) {
CHECK_EQ(extOutGrad_->getData(), output_.grad->getData())
<< "the external buffer should share the same data with output_.grad";
}
if (cvtOutGrad_) {
pipelineBwd_.insert(pipelineBwd_.begin(), *cvtOutGrad_);
}
if (cvtInGrad_) {
pipelineBwd_.push_back(*cvtInGrad_);
}
resetBwd(pipelineBwd_, inGrads_, outGrad_);
prepareGradConversions(pipelineBwd_);
printGradFormat();
needResetBwd_ = false;
}
......@@ -141,8 +121,8 @@ void MKLDNNLayer::backward(const UpdateCallback& callback) {
void MKLDNNLayer::reshapeInput(int& batchsize,
int& height,
int& width,
size_t inputIdx) {
const Argument& input = inputLayers_[inputIdx]->getOutput();
size_t idx) {
const Argument& input = inputLayers_[idx]->getOutput();
batchsize = input.getBatchSize();
int h = input.getFrameHeight();
int w = input.getFrameWidth();
......@@ -176,27 +156,30 @@ void MKLDNNLayer::resetWithMatrix(MKLDNNMatrixPtr& dnn,
void MKLDNNLayer::resetInValue(
MKLDNNMatrixPtr& in,
const std::shared_ptr<memory::primitive_desc>& intPD,
size_t inputIdx) {
cvtInVal_ = nullptr;
extInVal_ = nullptr;
size_t idx,
int inputChannel) {
cvtInVals_[idx] = nullptr;
extInVals_[idx] = nullptr;
in = nullptr;
CHECK_GT(bs_ * ic_ * ih_ * iw_, 0);
inputChannel = inputChannel == 0 ? ic_ : inputChannel;
CHECK_GT(bs_ * inputChannel * ih_ * iw_, 0);
auto extPD = MKLDNNMatrix::createPrimitiveDesc(
{bs_, ic_, ih_, iw_}, format::nchw, engine_);
const MatrixPtr& inMat = inputLayers_[inputIdx]->getOutputValue();
extInVal_ = std::dynamic_pointer_cast<MKLDNNMatrix>(inMat);
CHECK_EQ(inputIsOnlyMKLDNN(), extInVal_ != nullptr);
if (extInVal_ == nullptr || extInVal_->getFormat() == format::nc) {
extInVal_ = MKLDNNMatrix::create(extPD, inMat);
{bs_, inputChannel, ih_, iw_}, format::nchw, engine_);
const MatrixPtr& inMat = inputLayers_[idx]->getOutputValue();
extInVals_[idx] = std::dynamic_pointer_cast<MKLDNNMatrix>(inMat);
CHECK_EQ(inputIsOnlyMKLDNN(), extInVals_[idx] != nullptr);
if (extInVals_[idx] == nullptr ||
extInVals_[idx]->getFormat() == format::nc) {
extInVals_[idx] = MKLDNNMatrix::create(extPD, inMat);
}
in = extInVal_;
in = extInVals_[idx];
if (nullptr == intPD || in->getPrimitiveDesc() == *intPD) {
return;
}
// need create reorder
in = MKLDNNMatrix::create(*intPD);
cvtInVal_ = MKLDNNMatrix::createReorder(extInVal_, in);
CHECK(cvtInVal_) << "should not be emptry";
cvtInVals_[idx] = MKLDNNMatrix::createReorder(extInVals_[idx], in);
CHECK(cvtInVals_[idx]) << "should not be emptry";
}
void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out,
......@@ -218,11 +201,11 @@ void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out,
void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in,
memory::primitive_desc intPD,
size_t inputIdx) {
cvtInGrad_ = nullptr;
extInGrad_ = nullptr;
size_t idx) {
cvtInGrads_[idx] = nullptr;
extInGrads_[idx] = nullptr;
in = nullptr;
LayerPtr& input = inputLayers_[inputIdx];
LayerPtr& input = inputLayers_[idx];
if (input->getOutputGrad() == nullptr) {
// no need input grad
return;
......@@ -237,23 +220,25 @@ void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in,
in = MKLDNNMatrix::create(intPD, inMat);
Argument& arg = input->getOutput(this->getName());
arg.grad = std::dynamic_pointer_cast<Matrix>(in);
CHECK_PRIMITIVE_DESC_EQ(inVal_, intPD);
CHECK_PRIMITIVE_DESC_EQ(inVals_[idx], intPD);
if (inputIsOnlyMKLDNN()) {
return;
}
extInGrad_ = in;
if (isPaddleFormat(extInGrad_->getFormat())) {
extInGrads_[idx] = in;
if (isPaddleFormat(extInGrads_[idx]->getFormat())) {
return;
}
// need create reorder
CHECK(extInVal_ != nullptr && isPaddleFormat(extInVal_->getFormat()))
CHECK(extInVals_[idx] != nullptr &&
isPaddleFormat(extInVals_[idx]->getFormat()))
<< "should have external input value and the format must be nchw(nc)";
extInGrad_ = MKLDNNMatrix::create(extInVal_->getPrimitiveDesc(), inMat);
CHECK_PRIMITIVE_DESC_EQ(inVal_, intPD);
extInGrads_[idx] =
MKLDNNMatrix::create(extInVals_[idx]->getPrimitiveDesc(), inMat);
CHECK_PRIMITIVE_DESC_EQ(inVals_[idx], intPD);
in = MKLDNNMatrix::create(intPD);
cvtInGrad_ = MKLDNNMatrix::createReorder(in, extInGrad_);
CHECK(cvtInGrad_);
cvtInGrads_[idx] = MKLDNNMatrix::createReorder(in, extInGrads_[idx]);
CHECK(cvtInGrads_[idx]);
}
void MKLDNNLayer::resetOutGrad(MKLDNNMatrixPtr& out,
......
......@@ -34,15 +34,16 @@ typedef std::shared_ptr<MKLDNNLayer> MKLDNNLayerPtr;
*/
class MKLDNNLayer : public Layer {
protected:
// input value element count
size_t inputElemenCnt_;
// batch size
int bs_;
// they sizes are always from the first input layer
// input image channel, height and width
int ic_, ih_, iw_;
// output image channel, height and width
int oc_, oh_, ow_;
// the condition that forward need be reset
size_t condition_;
// backward also need reset after reset forward handle
bool needResetBwd_;
......@@ -67,18 +68,18 @@ protected:
* When all layers are mkldnn layers, they could save internal data.
*/
// below MKLDNNMatrix buffers are all internal buffers
MKLDNNMatrixPtr inVal_;
MKLDNNMatrixPtr inGrad_;
std::vector<MKLDNNMatrixPtr> inVals_;
std::vector<MKLDNNMatrixPtr> inGrads_;
MKLDNNMatrixPtr outVal_;
MKLDNNMatrixPtr outGrad_;
// below are external value and grad
MKLDNNMatrixPtr extInVal_;
MKLDNNMatrixPtr extInGrad_;
std::vector<MKLDNNMatrixPtr> extInVals_;
std::vector<MKLDNNMatrixPtr> extInGrads_;
MKLDNNMatrixPtr extOutVal_;
MKLDNNMatrixPtr extOutGrad_;
// convert handle between external and internal buffers
std::shared_ptr<mkldnn::reorder> cvtInVal_;
std::shared_ptr<mkldnn::reorder> cvtInGrad_;
std::vector<std::shared_ptr<mkldnn::reorder>> cvtInVals_;
std::vector<std::shared_ptr<mkldnn::reorder>> cvtInGrads_;
std::shared_ptr<mkldnn::reorder> cvtOutVal_;
std::shared_ptr<mkldnn::reorder> cvtOutGrad_;
......@@ -102,14 +103,7 @@ protected:
public:
explicit MKLDNNLayer(const LayerConfig& config)
: Layer(config),
inputElemenCnt_(0),
bs_(0),
ic_(0),
ih_(0),
iw_(0),
oc_(0),
oh_(0),
ow_(0),
condition_(0),
needResetBwd_(true),
outputOnlyMKLDNN_(false),
engine_(mkldnn::engine::cpu, 0),
......@@ -125,31 +119,28 @@ public:
virtual void backward(const UpdateCallback& callback);
/**
* reshape the input image sizes
* and reset output image and buffer size
* output channel can not be changed
* reshape the input and output channels and image sizes
* and reset output buffer size
*/
virtual void reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) = 0;
int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) = 0;
/**
* reset the mkldnn forward primitve and memories
* only would be called when input size changes
* weight and bias buffers should be coverd by child class itself
*/
virtual void resetFwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) = 0;
/**
* reset the mkldnn backward primitve and memories
* only would be called when needed
* weight and bias buffers should be coverd by child class itself
*/
virtual void resetBwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) = 0;
/**
......@@ -175,13 +166,19 @@ public:
void addOutputArgument(int deviceId) { Layer::addOutputArgument(deviceId); }
protected:
/**
* Some layers may have different condition to reset the forward.
* The function returns the condition that do not need reset forward.
*/
inline virtual size_t keepCondition() {
// reset when the first input element size changed, not only the batchsize
return inputLayers_[0]->getOutputValue()->getElementCnt();
}
/**
* reshape the input image sizes and input batchsize
*/
void reshapeInput(int& batchsize,
int& height,
int& width,
size_t inputIdx = 0);
void reshapeInput(int& batchsize, int& height, int& width, size_t idx = 0);
/**
* reshape output image sizes
......@@ -199,11 +196,13 @@ protected:
/**
* reset input value from input MKLDNNMatrix and internal primitive desc.
* reset both internal and external buffer and create reorder if necessary.
* input channel may be different in concat.
*/
void resetInValue(
MKLDNNMatrixPtr& in,
const std::shared_ptr<mkldnn::memory::primitive_desc>& intPD = nullptr,
size_t inputIdx = 0);
size_t idx = 0,
int inputChannel = 0);
/**
* reset output value from internal primitive desc.
......@@ -218,7 +217,7 @@ protected:
*/
void resetInGrad(MKLDNNMatrixPtr& in,
mkldnn::memory::primitive_desc intPD,
size_t inputIdx = 0);
size_t idx = 0);
/**
* reset output grad from internal primitive desc.
......@@ -296,17 +295,19 @@ protected:
* print the mkldnn memory format of value
*/
virtual void printValueFormat() {
if (extInVal_) {
VLOG(MKLDNN_FMTS) << extInVal_->getFormat() << " >>> ";
}
if (inVal_) {
VLOG(MKLDNN_FMTS) << inVal_->getFormat() << " >>>";
for (size_t i = 0; i < inVals_.size(); ++i) {
if (!inVals_[i]) {
continue;
}
VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName()
<< ": " << (extInVals_[i] ? extInVals_[i]->getFormat()
: inVals_[i]->getFormat())
<< " >>> " << inVals_[i]->getFormat() << " >>>";
}
if (outVal_) {
VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> ";
}
if (extOutVal_) {
VLOG(MKLDNN_FMTS) << extOutVal_->getFormat();
VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> "
<< (extOutVal_ ? extOutVal_->getFormat()
: outVal_->getFormat());
}
if (wgtVal_) {
VLOG(MKLDNN_FMTS) << "Weight value format: " << wgtVal_->getFormat();
......@@ -320,17 +321,19 @@ protected:
* print the mkldnn memory format of grad
*/
virtual void printGradFormat() {
if (extOutGrad_) {
VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat();
}
if (outGrad_) {
VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< ";
VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< "
<< (extOutGrad_ ? extOutGrad_->getFormat()
: outGrad_->getFormat());
}
if (inGrad_) {
VLOG(MKLDNN_FMTS) << inGrad_->getFormat() << " <<<";
}
if (extInGrad_) {
VLOG(MKLDNN_FMTS) << extInGrad_->getFormat() << " <<< ";
for (size_t i = 0; i < inGrads_.size(); ++i) {
if (!inGrads_[i]) {
continue;
}
VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName()
<< ": " << (extInGrads_[i] ? extInGrads_[i]->getFormat()
: inGrads_[i]->getFormat())
<< " <<< " << inGrads_[i]->getFormat() << " <<<";
}
if (wgtGrad_) {
VLOG(MKLDNN_FMTS) << "Weight grad format: " << wgtGrad_->getFormat();
......@@ -437,6 +440,41 @@ private:
outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims;
}
}
void prepareValueConversions(std::vector<mkldnn::primitive>& pipeline) {
// MKLDNNLayer output value should be MKLDNNMatrix
// so external output value is necessary.
// Then external input value is not necessary,
// since input may be mkldnn internal buffer.
CHECK(extOutVal_) << "external output value is necessary";
output_.value = std::dynamic_pointer_cast<Matrix>(extOutVal_);
CHECK(inVals_[0] && outVal_) << "internal memories are necessary";
for (size_t i = 0; i < cvtInVals_.size(); ++i) {
if (cvtInVals_[i]) {
pipeline.insert(pipeline.begin(), *cvtInVals_[i]);
}
}
if (cvtOutVal_) {
pipeline.push_back(*cvtOutVal_);
}
}
void prepareGradConversions(std::vector<mkldnn::primitive>& pipeline) {
// external output grad is not necessary
// since output may be mkldnn internal buffer or merge them directly.
CHECK(outGrad_) << "internal output grad is necessary";
if (extOutGrad_) {
CHECK_EQ(extOutGrad_->getData(), output_.grad->getData())
<< "the external buffer should share the same data with output_.grad";
}
if (cvtOutGrad_) {
pipeline.insert(pipeline.begin(), *cvtOutGrad_);
}
for (size_t i = 0; i < cvtInGrads_.size(); ++i) {
if (cvtInGrads_[i]) {
pipeline.push_back(*cvtInGrads_[i]);
}
}
}
};
} // namespace paddle
......@@ -58,10 +58,11 @@ bool MKLDNNPoolLayer::init(const LayerMap& layerMap,
}
void MKLDNNPoolLayer::reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) {
int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) {
reshapeInput(bs, ih, iw);
// ic_ and oc can not be changed
CHECK_EQ(inputElemenCnt_ / bs / ih / iw, (size_t)ic)
CHECK_EQ((size_t)ic,
inputLayers_[0]->getOutputValue()->getElementCnt() / bs / ih / iw)
<< "Input channel can not be changed";
// cal output sizes
......@@ -74,29 +75,25 @@ void MKLDNNPoolLayer::reshape(
}
void MKLDNNPoolLayer::resetFwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) {
resetFwdBuffers(in, out);
resetFwdBuffers(inputs[0], out);
resetFwdPD(fwdPD_, in, out);
resetFwdPD(fwdPD_, inputs[0], out);
resetFwdPipeline(pipeline, fwdPD_, in, out);
resetFwdPipeline(pipeline, fwdPD_, inputs[0], out);
}
void MKLDNNPoolLayer::resetBwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) {
std::shared_ptr<pool_bwd::primitive_desc> pd;
resetBwdBuffers(in, out);
resetBwdBuffers(inputs[0], out);
resetBwdPD(pd, in, out);
resetBwdPD(pd, inputs[0], out);
resetBwdPipeline(pipeline, pd, in, out);
resetBwdPipeline(pipeline, pd, inputs[0], out);
}
void MKLDNNPoolLayer::resetFwdBuffers(MKLDNNMatrixPtr& in,
......@@ -151,9 +148,9 @@ void MKLDNNPoolLayer::resetFwdPipeline(
void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& out) {
CHECK(inVal_ && outVal_);
CHECK(inVals_[0] && outVal_);
resetOutGrad(out, outVal_->getPrimitiveDesc());
resetInGrad(in, inVal_->getPrimitiveDesc());
resetInGrad(in, inVals_[0]->getPrimitiveDesc());
}
void MKLDNNPoolLayer::resetBwdPD(std::shared_ptr<pool_bwd::primitive_desc>& pd,
......
......@@ -53,18 +53,14 @@ public:
const ParameterMap& parameterMap) override;
void reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override;
int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override;
void resetFwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) override;
void resetBwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) override;
void printSizeInfo() override {
......@@ -75,11 +71,6 @@ public:
}
protected:
/**
* Forward functions: reset buffers(input, output),
* reset primitive descriptor,
* reset pipeline.
*/
void resetFwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out);
void resetFwdPD(std::shared_ptr<pool_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr in,
......@@ -88,12 +79,6 @@ protected:
std::shared_ptr<pool_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& out);
/**
* Backward functions: reset buffers(input, output),
* reset primitive descriptor,
* reset pipeline.
*/
void resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out);
void resetBwdPD(std::shared_ptr<pool_bwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册