提交 f2317b67 编写于 作者: T tensor-tang

separate resetFwd and resetBwd to some sub functions

上级 66fdbd0c
...@@ -18,9 +18,6 @@ limitations under the License. */ ...@@ -18,9 +18,6 @@ limitations under the License. */
using namespace mkldnn; // NOLINT using namespace mkldnn; // NOLINT
typedef memory::format format; typedef memory::format format;
typedef convolution_forward conv_fwd;
typedef convolution_backward_weights conv_bwdWgt;
typedef convolution_backward_data conv_bwdData;
namespace paddle { namespace paddle {
...@@ -114,237 +111,396 @@ void MKLDNNConvLayer::resetFwd(std::vector<primitive>& pipeline, ...@@ -114,237 +111,396 @@ void MKLDNNConvLayer::resetFwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
pipeline.clear(); resetFwdPD(fwdPD_);
bool hasBias = biases_ && biases_->getW();
biasVal_ = nullptr; resetFwdBuffers(fwdPD_, in, wgt, bias, out);
resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out);
printValueFormatFlow();
}
void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
std::shared_ptr<conv_bwdWgt::primitive_desc> bwdWgtPD;
std::shared_ptr<conv_bwdData::primitive_desc> bwdDataPD;
resetBwdWgtPD(bwdWgtPD);
resetBwdDataPD(bwdDataPD);
resetBwdBuffers(bwdWgtPD, bwdDataPD, in, wgt, bias, out);
resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out);
printGradFormatFlow();
}
void MKLDNNConvLayer::updateInputData() {
cpuInVal_->setData(getInputValue(0, CPU_DEVICE)->getData());
}
void MKLDNNConvLayer::updateWeights(const UpdateCallback& callback) {
weight_->getParameterPtr()->incUpdate(callback);
if (biases_ && biases_->getWGrad()) {
biases_->getParameterPtr()->incUpdate(callback);
}
}
void MKLDNNConvLayer::loadConvSettings(memory::dims& wgt,
memory::dims& bias,
memory::dims& stride,
memory::dims& dilation,
memory::dims& padL,
memory::dims& padR) {
wgt = (gp_ == 1) ? memory::dims{oc_, ic_, fh_, fw_}
: memory::dims{gp_, oc_ / gp_, ic_ / gp_, fh_, fw_};
bias = memory::dims{oc_};
stride = memory::dims{sh_, sw_};
padL = memory::dims{ph_, pw_};
padR = getPaddingR();
// note: mkldnn dilation start from 0
dilation = memory::dims{dh_ - 1, dw_ - 1};
}
void MKLDNNConvLayer::resetFwdPD(
std::shared_ptr<conv_fwd::primitive_desc>& pd) {
// dims for conv // dims for conv
memory::dims inDims = memory::dims{bs_, ic_, ih_, iw_}; memory::dims inDims = memory::dims{bs_, ic_, ih_, iw_};
memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
memory::dims wgtDims = memory::dims wgtDims, biasDims, strides, dilations, padL, padR;
(gp_ == 1) ? memory::dims{oc_, ic_, fh_, fw_} loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR);
: memory::dims{gp_, oc_ / gp_, ic_ / gp_, fh_, fw_};
memory::dims biasDims = memory::dims{oc_};
memory::dims strides = {sh_, sw_};
// note: mkldnn dilation start from 0
memory::dims dilations = {dh_ - 1, dw_ - 1};
memory::dims padding = {ph_, pw_};
memory::dims padR = getPaddingR();
// create forward handle prop_kind pk = passType_ == PASS_TEST ? prop_kind::forward_scoring
prop_kind pk = : prop_kind::forward_training;
passType_ == PASS_TEST ? prop_kind::forward : prop_kind::forward_training;
algorithm algo = algorithm::convolution_direct; algorithm algo = algorithm::convolution_direct;
padding_kind padKind = padding_kind::zero; padding_kind padKind = padding_kind::zero;
conv_fwd::desc fwdDesc = conv_fwd::desc fwdDesc =
hasBias ? conv_fwd::desc(pk, biases_ && biases_->getW()
algo, ? conv_fwd::desc(pk,
MKLDNNMatrix::createMemoryDesc(inDims), algo,
MKLDNNMatrix::createMemoryDesc(wgtDims), MKLDNNMatrix::createMemoryDesc(inDims),
MKLDNNMatrix::createMemoryDesc(biasDims), MKLDNNMatrix::createMemoryDesc(wgtDims),
MKLDNNMatrix::createMemoryDesc(outDims), MKLDNNMatrix::createMemoryDesc(biasDims),
strides, MKLDNNMatrix::createMemoryDesc(outDims),
dilations, strides,
padding, dilations,
padR, padL,
padKind) padR,
: conv_fwd::desc(pk, padKind)
algo, : conv_fwd::desc(pk,
MKLDNNMatrix::createMemoryDesc(inDims), algo,
MKLDNNMatrix::createMemoryDesc(wgtDims), MKLDNNMatrix::createMemoryDesc(inDims),
MKLDNNMatrix::createMemoryDesc(outDims), MKLDNNMatrix::createMemoryDesc(wgtDims),
strides, MKLDNNMatrix::createMemoryDesc(outDims),
dilations, strides,
padding, dilations,
padR, padL,
padKind); padR,
fwdPD_.reset(new conv_fwd::primitive_desc(fwdDesc, engine_)); padKind);
pd.reset(new conv_fwd::primitive_desc(fwdDesc, engine_));
// create mkldnn matrix }
const MatrixPtr& wgtVal = weight_->getW();
const MatrixPtr& inVal = inputLayers_[0]->getOutput().value; void MKLDNNConvLayer::resetFwdBuffers(
const MatrixPtr& outVal = output_.value; std::shared_ptr<conv_fwd::primitive_desc>& pd,
wgt = MKLDNNMatrix::create(wgtVal, fwdPD_->weights_primitive_desc()); MKLDNNMatrixPtr& in,
in = MKLDNNMatrix::create(inVal, fwdPD_->src_primitive_desc()); MKLDNNMatrixPtr& wgt,
out = MKLDNNMatrix::create(outVal, fwdPD_->dst_primitive_desc()); MKLDNNMatrixPtr& bias,
VLOG(MKLDNN_FMTS) << "Weight value format: " << wgtVal_->getFormat(); MKLDNNMatrixPtr& out) {
if (hasBias) { CHECK(pd);
const MatrixPtr& biasVal = biases_->getW(); resetInValue(pd, in);
bias = MKLDNNMatrix::create(biasVal, biasDims, format::x, engine_);
CHECK(bias->getPrimitiveDesc() == fwdPD_->bias_primitive_desc()) resetWgtBiasValue(pd, wgt, bias);
<< "bias primitive desc should always be equal";
resetOutValue(pd, out);
}
void MKLDNNConvLayer::resetFwdPipeline(
std::vector<primitive>& pipeline,
std::shared_ptr<conv_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
pipeline.clear();
if (cvtInVal_) {
pipeline.push_back(*cvtInVal_);
}
if (bias) {
fwd_.reset(new conv_fwd(*pd, *in, *wgt, *bias, *out));
} else {
fwd_.reset(new conv_fwd(*pd, *in, *wgt, *out));
} }
pipeline.push_back(*fwd_);
if (cvtOutVal_) {
pipeline.push_back(*cvtOutVal_);
}
}
// add reorder if input value do not match void MKLDNNConvLayer::resetInValue(
std::shared_ptr<conv_fwd::primitive_desc>& pd, MKLDNNMatrixPtr& in) {
const MatrixPtr& inMat = inputLayers_[0]->getOutput().value;
in = MKLDNNMatrix::create(inMat, pd->src_primitive_desc());
// create buffer and reorder if input value do not match
cpuInVal_ = nullptr;
cvtInVal_ = nullptr;
if (inputIsOnlyMKLDNN()) { if (inputIsOnlyMKLDNN()) {
MKLDNNMatrixPtr dnnIn = std::dynamic_pointer_cast<MKLDNNMatrix>(inVal); MKLDNNMatrixPtr dnnIn = std::dynamic_pointer_cast<MKLDNNMatrix>(inMat);
CHECK(dnnIn) << "Input should be MKLDNNMatrix"; CHECK(dnnIn) << "Input should be MKLDNNMatrix";
if (dnnIn->getPrimitiveDesc() != in->getPrimitiveDesc()) { if (dnnIn->getPrimitiveDesc() != in->getPrimitiveDesc()) {
CHECK_EQ(dnnIn->getFormat(), format::nc); CHECK_EQ(dnnIn->getFormat(), format::nc);
CHECK(ih_ == 1 && iw_ == 1); CHECK(ih_ == 1 && iw_ == 1) << "when input is nc format";
dnnIn = MKLDNNMatrix::create(inVal, inDims, format::nchw, engine_); // create a new one with nchw format and same data
memory::dims inDims = memory::dims{bs_, ic_, 1, 1};
dnnIn = MKLDNNMatrix::create(inMat, inDims, format::nchw, engine_);
CHECK(dnnIn->getPrimitiveDesc() == in->getPrimitiveDesc()); CHECK(dnnIn->getPrimitiveDesc() == in->getPrimitiveDesc());
} }
in = dnnIn; in = dnnIn;
} else { } else {
const MatrixPtr& cpuIn = getInputValue(0, CPU_DEVICE); const MatrixPtr& cpuIn = getInputValue(0, CPU_DEVICE);
memory::dims inDims = memory::dims{bs_, ic_, ih_, iw_};
cpuInVal_ = MKLDNNMatrix::create(cpuIn, inDims, format::nchw, engine_); cpuInVal_ = MKLDNNMatrix::create(cpuIn, inDims, format::nchw, engine_);
if (cpuInVal_->getPrimitiveDesc() != in->getPrimitiveDesc()) { if (cpuInVal_->getPrimitiveDesc() != in->getPrimitiveDesc()) {
// create new mkldnn matrix // create new mkldnn matrix
in = MKLDNNMatrix::create(nullptr, fwdPD_->src_primitive_desc()); in = MKLDNNMatrix::create(nullptr, pd->src_primitive_desc());
cvtInVal_ = MKLDNNMatrix::createReorder(cpuInVal_, in); cvtInVal_ = MKLDNNMatrix::createReorder(cpuInVal_, in);
CHECK(cvtInVal_); CHECK(cvtInVal_) << "should not be emptry";
pipeline.push_back(*cvtInVal_);
} else { } else {
in = cpuInVal_; in = cpuInVal_;
} }
} }
}
// add fwd handle void MKLDNNConvLayer::resetWgtBiasValue(
if (hasBias) { std::shared_ptr<conv_fwd::primitive_desc>& pd,
fwd_.reset(new conv_fwd(*fwdPD_, *in, *wgt, *bias, *out)); MKLDNNMatrixPtr& wgt,
} else { MKLDNNMatrixPtr& bias) {
fwd_.reset(new conv_fwd(*fwdPD_, *in, *wgt, *out)); wgt = MKLDNNMatrix::create(weight_->getW(), pd->weights_primitive_desc());
VLOG(MKLDNN_FMTS) << "Weight value format: " << wgt->getFormat();
bias = nullptr;
if (biases_ && biases_->getW()) {
bias = MKLDNNMatrix::create(biases_->getW(), pd->bias_primitive_desc());
} }
pipeline.push_back(*fwd_); }
void MKLDNNConvLayer::resetOutValue(
std::shared_ptr<conv_fwd::primitive_desc>& pd, MKLDNNMatrixPtr& out) {
out = MKLDNNMatrix::create(output_.value, pd->dst_primitive_desc());
// change original output value from cpu matrix to mkldnn matrix // change original output value from cpu matrix to mkldnn matrix
output_.value = std::dynamic_pointer_cast<Matrix>(out); output_.value = std::dynamic_pointer_cast<Matrix>(out);
// add reorder if output value has cpu device and pd do not match
// create reorder if output value has cpu device and pd do not match
cpuOutVal_ = nullptr;
cpuOutVal_ = nullptr;
if (!outputIsOnlyMKLDNN()) { if (!outputIsOnlyMKLDNN()) {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value;
memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_); cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_);
if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) { if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) {
cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
CHECK(cvtOutVal_); CHECK(cvtOutVal_) << "should not be emptry";
pipeline.push_back(*cvtOutVal_);
} else { } else {
// share data // CPU output share the same data of MKLDNN output
cpuOut->setData(out->getData()); cpuOut->setData(out->getData());
cpuOutVal_ = out; cpuOutVal_ = out;
} }
} }
printValueFormatFlow();
} }
void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline, void MKLDNNConvLayer::resetBwdWgtPD(
MKLDNNMatrixPtr& in, std::shared_ptr<conv_bwdWgt::primitive_desc>& pd) {
MKLDNNMatrixPtr& wgt, memory::dims wgtDims, biasDims, strides, dilations, padL, padR;
MKLDNNMatrixPtr& bias, loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR);
MKLDNNMatrixPtr& out) {
pipeline.clear();
bool hasBias = biases_ && biases_->getWGrad();
/// backward weight // create backward weight using input, output and weight value memory desc
CHECK(inVal_) << "Should have input value"; CHECK(inVal_) << "Should have input value";
CHECK(outVal_) << "Should have output value"; CHECK(outVal_) << "Should have output value";
CHECK(wgtVal_) << "Should have weight value"; CHECK(wgtVal_) << "Should have weight value";
memory::dims wgtDims =
(gp_ == 1) ? memory::dims{oc_, ic_, fh_, fw_}
: memory::dims{gp_, oc_ / gp_, ic_ / gp_, fh_, fw_};
memory::dims strides = {sh_, sw_};
memory::dims dilations = {dh_ - 1, dw_ - 1};
memory::dims padding = {ph_, pw_};
memory::dims padR = getPaddingR();
// create backward handle
algorithm algo = algorithm::convolution_direct; algorithm algo = algorithm::convolution_direct;
padding_kind padKind = padding_kind::zero; padding_kind padKind = padding_kind::zero;
auto bwdWgtDesc = auto bwdWgtDesc = biasVal_ != nullptr
hasBias ? conv_bwdWgt::desc(algo, ? conv_bwdWgt::desc(algo,
inVal_->getMemoryDesc(), inVal_->getMemoryDesc(),
MKLDNNMatrix::createMemoryDesc(wgtDims), wgtVal_->getMemoryDesc(),
biasVal_->getMemoryDesc(), biasVal_->getMemoryDesc(),
outVal_->getMemoryDesc(), outVal_->getMemoryDesc(),
strides, strides,
padding, padL,
padR, padR,
padKind) padKind)
: conv_bwdWgt::desc(algo, : conv_bwdWgt::desc(algo,
inVal_->getMemoryDesc(), inVal_->getMemoryDesc(),
MKLDNNMatrix::createMemoryDesc(wgtDims), wgtVal_->getMemoryDesc(),
outVal_->getMemoryDesc(), outVal_->getMemoryDesc(),
strides, strides,
padding, padL,
padR, padR,
padKind); padKind);
pd.reset(new conv_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_));
auto bwdWgtPD = conv_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_); CHECK(pd->src_primitive_desc() == inVal_->getPrimitiveDesc())
CHECK(bwdWgtPD.src_primitive_desc() == inVal_->getPrimitiveDesc())
<< "primitive desc of in value should equal"; << "primitive desc of in value should equal";
CHECK(bwdWgtPD.diff_dst_primitive_desc() == outVal_->getPrimitiveDesc()) CHECK(pd->diff_dst_primitive_desc() == outVal_->getPrimitiveDesc())
<< "primitive desc of out grad should equal the out value"; << "primitive desc of out grad should equal the out value";
CHECK(bwdWgtPD.diff_weights_primitive_desc() == wgtVal_->getPrimitiveDesc()) CHECK(pd->diff_weights_primitive_desc() == wgtVal_->getPrimitiveDesc())
<< "primitive desc of weight grad should equal the weight value"; << "primitive desc of weight grad should equal the weight value";
}
// create mkldnn matrix void MKLDNNConvLayer::resetBwdDataPD(
const MatrixPtr& wgtGrad = weight_->getWGrad(); std::shared_ptr<conv_bwdData::primitive_desc>& pd) {
const MatrixPtr& outGrad = output_.grad; if (inputLayers_[0]->getOutput().grad == nullptr) {
wgt = MKLDNNMatrix::create(wgtGrad, bwdWgtPD.diff_weights_primitive_desc()); return;
out = MKLDNNMatrix::create(outGrad, bwdWgtPD.diff_dst_primitive_desc());
CHECK(wgt->getPrimitiveDesc() == wgtVal_->getPrimitiveDesc())
<< "primitive desc of weight grad and value should be equal";
CHECK(out->getPrimitiveDesc() == outVal_->getPrimitiveDesc())
<< "primitive desc of out grad and value should be equal";
VLOG(MKLDNN_FMTS) << "Backward weight, weight grad format: "
<< wgt->getFormat();
if (hasBias) {
const MatrixPtr& biasGrad = biases_->getWGrad();
bias = MKLDNNMatrix::create(biasGrad, bwdWgtPD.diff_bias_primitive_desc());
CHECK(bias->getPrimitiveDesc() == biasVal_->getPrimitiveDesc())
<< "primitive desc of bias grad should equal the bias value";
} }
memory::dims wgtDims, biasDims, strides, dilations, padL, padR;
loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR);
CHECK(inVal_) << "Should have input value";
CHECK(outVal_) << "Should have output value";
// create backward data using input and output value memory desc
// but using weight memory desc with any format
auto bwdDataDesc = conv_bwdData::desc(algorithm::convolution_direct,
inVal_->getMemoryDesc(),
MKLDNNMatrix::createMemoryDesc(wgtDims),
outVal_->getMemoryDesc(),
strides,
padL,
padR,
padding_kind::zero);
pd.reset(new conv_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_));
CHECK(pd->diff_src_primitive_desc() == inVal_->getPrimitiveDesc())
<< "primitive desc of in grad should equal the in value";
CHECK(pd->diff_dst_primitive_desc() == outVal_->getPrimitiveDesc())
<< "primitive desc of out grad should equal";
}
void MKLDNNConvLayer::resetBwdBuffers(
std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
CHECK(wgtPD);
resetOutGrad(wgtPD, out);
resetWgtBiasGrad(wgtPD, wgt, bias);
resetInGrad(dataPD, in);
resetWgtValBwdData(dataPD, wgtValBwdData_);
}
void MKLDNNConvLayer::resetBwdPipeline(
std::vector<primitive>& pipeline,
std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
pipeline.clear();
if (cvtOutGrad_) {
pipeline.push_back(*cvtOutGrad_);
}
// add bwdWgt handle
if (bias) {
bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt, *bias));
} else {
bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt));
}
pipeline.push_back(*bwdWgt_);
if (dataPD == nullptr) {
return;
}
if (cvtWgtVal_) {
pipeline.push_back(*cvtWgtVal_);
}
// add bwdData handle
CHECK(wgtValBwdData_) << "Should have weight memory";
bwdData_.reset(new conv_bwdData(*dataPD, *out, *wgtValBwdData_, *in));
pipeline.push_back(*bwdData_);
if (cvtInGrad_) {
pipeline.push_back(*cvtInGrad_);
}
}
void MKLDNNConvLayer::resetOutGrad(
std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD, MKLDNNMatrixPtr& out) {
const MatrixPtr& outMat = output_.grad;
out = MKLDNNMatrix::create(outMat, wgtPD->diff_dst_primitive_desc());
CHECK(outVal_ != nullptr &&
out->getPrimitiveDesc() == outVal_->getPrimitiveDesc())
<< "primitive desc of out grad and value should be equal";
// TODO(TJ): merge outgrad // TODO(TJ): merge outgrad
// add reorder if has user output grad // create reorder if has output grad does not match
cpuOutGrad_ = nullptr;
cvtOutGrad_ = nullptr;
if (!outputIsOnlyMKLDNN()) { if (!outputIsOnlyMKLDNN()) {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
// same PrimitiveDesc with cpuInVal_ // same PrimitiveDesc with cpuInVal_
CHECK(cpuOutVal_); CHECK(cpuOutVal_);
cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc()); cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc());
if (cpuOutGrad_->getPrimitiveDesc() == out->getPrimitiveDesc()) { if (cpuOutGrad_->getPrimitiveDesc() == out->getPrimitiveDesc()) {
outGrad->setData(cpuOut->getData()); outMat->setData(cpuOut->getData());
out = cpuOutGrad_; out = cpuOutGrad_;
} else { } else {
cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out); cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out);
CHECK(cvtOutGrad_); CHECK(cvtOutGrad_);
pipeline.push_back(*cvtOutGrad_);
} }
} }
}
// add bwdWgt handle void MKLDNNConvLayer::resetWgtBiasGrad(
if (hasBias) { std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
bwdWgt_.reset(new conv_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt, *bias)); MKLDNNMatrixPtr& wgt,
} else { MKLDNNMatrixPtr& bias) {
bwdWgt_.reset(new conv_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt)); wgt = MKLDNNMatrix::create(weight_->getWGrad(),
} wgtPD->diff_weights_primitive_desc());
pipeline.push_back(*bwdWgt_); CHECK(nullptr != wgtVal_ &&
wgt->getPrimitiveDesc() == wgtVal_->getPrimitiveDesc())
<< "primitive desc of weight grad and value should be equal";
VLOG(MKLDNN_FMTS) << "weight grad format: " << wgt->getFormat();
/// backward data if (biasVal_ == nullptr) {
const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad;
if (inGrad == nullptr) {
return; return;
} }
bias = MKLDNNMatrix::create(biases_->getWGrad(),
wgtPD->diff_bias_primitive_desc());
CHECK(bias->getPrimitiveDesc() == biasVal_->getPrimitiveDesc())
<< "primitive desc of bias grad should equal the bias value";
}
auto bwdDataDesc = conv_bwdData::desc(algo, void MKLDNNConvLayer::resetInGrad(
inVal_->getMemoryDesc(), std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
MKLDNNMatrix::createMemoryDesc(wgtDims), MKLDNNMatrixPtr& in) {
out->getMemoryDesc(), if (dataPD == nullptr) {
strides, return;
padding, }
padR,
padKind);
auto bwdDataPD = conv_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_);
CHECK(bwdDataPD.diff_src_primitive_desc() == inVal_->getPrimitiveDesc())
<< "primitive desc of in grad should equal the in value";
CHECK(bwdDataPD.diff_dst_primitive_desc() == out->getPrimitiveDesc())
<< "primitive desc of out grad should equal";
// create mkldnn matrix inGrad_ and reorder if necessary
// TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done
in = MKLDNNMatrix::create(inGrad, bwdDataPD.diff_src_primitive_desc()); in = MKLDNNMatrix::create(inputLayers_[0]->getOutput().grad,
dataPD->diff_src_primitive_desc());
CHECK(nullptr != inVal_ &&
in->getPrimitiveDesc() == inVal_->getPrimitiveDesc())
<< "primitive desc of input grad and value should be equal";
// create reorder if has output grad does not match
cpuInGrad_ = nullptr;
cvtInGrad_ = nullptr; cvtInGrad_ = nullptr;
if (!inputIsOnlyMKLDNN()) { if (!inputIsOnlyMKLDNN()) {
const MatrixPtr& cpuIn = getInputGrad(0, CPU_DEVICE); const MatrixPtr& cpuIn = getInputGrad(0, CPU_DEVICE);
...@@ -360,43 +516,28 @@ void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline, ...@@ -360,43 +516,28 @@ void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline,
in = cpuInGrad_; in = cpuInGrad_;
} }
} }
}
// create new weight value for backward data, and reorder if necessary void MKLDNNConvLayer::resetWgtValBwdData(
std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
MKLDNNMatrixPtr& wgt) {
if (dataPD == nullptr) {
return;
}
// create new weight value for backward data, and create reorder if necessary
// since the primitive_desc would be different with wgtVal_ // since the primitive_desc would be different with wgtVal_
if (bwdDataPD.weights_primitive_desc() != wgtVal_->getPrimitiveDesc()) { CHECK(wgtVal_) << "should have weight value";
if (dataPD->weights_primitive_desc() != wgtVal_->getPrimitiveDesc()) {
wgtValBwdData_ = wgtValBwdData_ =
MKLDNNMatrix::create(nullptr, bwdDataPD.weights_primitive_desc()); MKLDNNMatrix::create(nullptr, dataPD->weights_primitive_desc());
cvtWgtVal_ = MKLDNNMatrix::createReorder(wgtVal_, wgtValBwdData_); cvtWgtVal_ = MKLDNNMatrix::createReorder(wgtVal_, wgtValBwdData_);
CHECK(cvtWgtVal_); CHECK(cvtWgtVal_);
pipeline.push_back(*cvtWgtVal_);
} else { } else {
wgtValBwdData_ = wgtVal_; wgtValBwdData_ = wgtVal_;
} }
VLOG(MKLDNN_FMTS) << "Backward data, weight value format: " VLOG(MKLDNN_FMTS) << "weight value format for backward data"
<< wgtValBwdData_->getFormat(); << wgtValBwdData_->getFormat();
// add bwdData handle
CHECK(wgtValBwdData_) << "Should have weight memory";
bwdData_.reset(new conv_bwdData(bwdDataPD, *out, *wgtValBwdData_, *in));
pipeline.push_back(*bwdData_);
// add ingrad reorder after bwdData
if (cvtInGrad_) {
pipeline.push_back(*cvtInGrad_);
}
printGradFormatFlow();
}
void MKLDNNConvLayer::updateInputData() {
cpuInVal_->setData(getInputValue(0, CPU_DEVICE)->getData());
}
void MKLDNNConvLayer::updateWeights(const UpdateCallback& callback) {
weight_->getParameterPtr()->incUpdate(callback);
if (biases_ && biases_->getWGrad()) {
biases_->getParameterPtr()->incUpdate(callback);
}
} }
} // namespace paddle } // namespace paddle
...@@ -18,6 +18,9 @@ limitations under the License. */ ...@@ -18,6 +18,9 @@ limitations under the License. */
#include "mkldnn.hpp" #include "mkldnn.hpp"
namespace paddle { namespace paddle {
typedef mkldnn::convolution_forward conv_fwd;
typedef mkldnn::convolution_backward_weights conv_bwdWgt;
typedef mkldnn::convolution_backward_data conv_bwdData;
/** /**
* @brief A subclass of MKLDNNLayer conv layer. * @brief A subclass of MKLDNNLayer conv layer.
...@@ -43,7 +46,7 @@ protected: ...@@ -43,7 +46,7 @@ protected:
std::shared_ptr<mkldnn::reorder> cvtWgtVal_; std::shared_ptr<mkldnn::reorder> cvtWgtVal_;
// save forward primitive_desc, which can be used backward // save forward primitive_desc, which can be used backward
std::shared_ptr<mkldnn::convolution_forward::primitive_desc> fwdPD_; std::shared_ptr<conv_fwd::primitive_desc> fwdPD_;
// MKLDNNMatrixPtr which should be created from CPU Device // MKLDNNMatrixPtr which should be created from CPU Device
MKLDNNMatrixPtr cpuInVal_; MKLDNNMatrixPtr cpuInVal_;
...@@ -99,7 +102,6 @@ public: ...@@ -99,7 +102,6 @@ public:
void convertWeightsToPaddle() override; void convertWeightsToPaddle() override;
protected:
void printSizeInfo() override { void printSizeInfo() override {
MKLDNNLayer::printSizeInfo(); MKLDNNLayer::printSizeInfo();
VLOG(MKLDNN_SIZES) << getName() << ": fh: " << fh_ << ", fw: " << fw_ VLOG(MKLDNN_SIZES) << getName() << ": fh: " << fh_ << ", fw: " << fw_
...@@ -116,6 +118,7 @@ protected: ...@@ -116,6 +118,7 @@ protected:
VLOG(MKLDNN_FMTS) << " >>> " << cpuOutVal_->getFormat(); VLOG(MKLDNN_FMTS) << " >>> " << cpuOutVal_->getFormat();
} }
} }
void printGradFormatFlow() override { void printGradFormatFlow() override {
if (cpuInGrad_) { if (cpuInGrad_) {
VLOG(MKLDNN_FMTS) << cpuInGrad_->getFormat() << " <<<"; VLOG(MKLDNN_FMTS) << cpuInGrad_->getFormat() << " <<<";
...@@ -126,6 +129,107 @@ protected: ...@@ -126,6 +129,107 @@ protected:
} }
} }
protected:
/**
* load the dims settings of this conv
*/
void loadConvSettings(mkldnn::memory::dims& wgt,
mkldnn::memory::dims& bias,
mkldnn::memory::dims& stride,
mkldnn::memory::dims& dilation,
mkldnn::memory::dims& padL,
mkldnn::memory::dims& padR);
/**
* reset the forward primitive descriptor.
*/
void resetFwdPD(std::shared_ptr<conv_fwd::primitive_desc>& pd);
/**
* reset the MKLDNNMatrix buffers used in forward.
*/
void resetFwdBuffers(std::shared_ptr<conv_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out);
/**
* reset the forward pipeline.
*/
void resetFwdPipeline(std::vector<mkldnn::primitive>& pipeline,
std::shared_ptr<conv_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out);
/**
* reset MKLDNNMatrix of input value
*/
void resetInValue(std::shared_ptr<conv_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in);
/**
* reset MKLDNNMatrix of weight and bias value
*/
void resetWgtBiasValue(std::shared_ptr<conv_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias);
/**
* reset MKLDNNMatrix of output value
*/
void resetOutValue(std::shared_ptr<conv_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& out);
/**
* reset the backward weight primitive descriptor.
*/
void resetBwdWgtPD(std::shared_ptr<conv_bwdWgt::primitive_desc>& pd);
/**
* reset the backward data primitive descriptor.
*/
void resetBwdDataPD(std::shared_ptr<conv_bwdData::primitive_desc>& pd);
/**
* reset the MKLDNNMatrix buffers used in backward.
*/
void resetBwdBuffers(std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out);
/**
* reset the backward pipeline.
*/
void resetBwdPipeline(std::vector<mkldnn::primitive>& pipeline,
std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out);
/**
* reset MKLDNNMatrix of output grad
*/
void resetOutGrad(std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
MKLDNNMatrixPtr& out);
/**
* reset MKLDNNMatrix of weight and bias grad
*/
void resetWgtBiasGrad(std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias);
/**
* reset MKLDNNMatrix of input grad
*/
void resetInGrad(std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
MKLDNNMatrixPtr& in);
/**
* reset MKLDNNMatrix of weight value for backward data
* since the primitive_desc would be different with wgtVal_
*/
void resetWgtValBwdData(std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
MKLDNNMatrixPtr& wgt);
/** /**
* get padding_r according to * get padding_r according to
* https://github.com/01org/mkl-dnn/blob/master/tests/gtests/ * https://github.com/01org/mkl-dnn/blob/master/tests/gtests/
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册