提交 698071cc 编写于 作者: T tensor-tang

share mkldnn output value data if next layer is cpu device

上级 6715beaa
...@@ -243,7 +243,7 @@ void MKLDNNConvLayer::resetFwdPipeline( ...@@ -243,7 +243,7 @@ void MKLDNNConvLayer::resetFwdPipeline(
void MKLDNNConvLayer::resetInValue( void MKLDNNConvLayer::resetInValue(
std::shared_ptr<conv_fwd::primitive_desc>& pd, MKLDNNMatrixPtr& in) { std::shared_ptr<conv_fwd::primitive_desc>& pd, MKLDNNMatrixPtr& in) {
const MatrixPtr& inMat = inputLayers_[0]->getOutput().value; const MatrixPtr& inMat = inputLayers_[0]->getOutputValue();
in = MKLDNNMatrix::create(inMat, pd->src_primitive_desc()); in = MKLDNNMatrix::create(inMat, pd->src_primitive_desc());
// create buffer and reorder if input value do not match // create buffer and reorder if input value do not match
...@@ -308,15 +308,20 @@ void MKLDNNConvLayer::resetOutValue( ...@@ -308,15 +308,20 @@ void MKLDNNConvLayer::resetOutValue(
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value;
memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_); cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_);
if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) { if (cpuOutVal_->getPrimitiveDesc() != pd->dst_primitive_desc()) {
out = MKLDNNMatrix::create(nullptr, pd->dst_primitive_desc());
cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
CHECK(cvtOutVal_) << "should not be emptry"; CHECK(cvtOutVal_) << "should not be empty";
} else { } else {
// CPU output share the same data of MKLDNN output
cpuOut->setData(out->getData());
cpuOutVal_ = out; cpuOutVal_ = out;
} }
// when output is cpu device, change the mkldnn output value and make they
// share the same data. Then if next layer use inputlayer->getOuputValue()
// to achieve the input value, it will get the right data.
output_.value = std::dynamic_pointer_cast<Matrix>(cpuOutVal_);
return;
} }
output_.value = std::dynamic_pointer_cast<Matrix>(out);
} }
void MKLDNNConvLayer::resetBwdWgtPD( void MKLDNNConvLayer::resetBwdWgtPD(
......
...@@ -180,10 +180,10 @@ void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt, ...@@ -180,10 +180,10 @@ void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt,
void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) { void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) {
out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_); out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_);
if (!outputIsOnlyMKLDNN()) { if (!outputIsOnlyMKLDNN()) {
// fc cpu output value do not need create convert // fc cpu output value do not need create convert, just share data
// just share point
getOutput(CPU_DEVICE).value->setData(out->getData()); getOutput(CPU_DEVICE).value->setData(out->getData());
} }
output_.value = std::dynamic_pointer_cast<Matrix>(out);
} }
void MKLDNNFcLayer::resetFwdPD(std::shared_ptr<fc_fwd::primitive_desc>& pd, void MKLDNNFcLayer::resetFwdPD(std::shared_ptr<fc_fwd::primitive_desc>& pd,
......
...@@ -127,10 +127,6 @@ public: ...@@ -127,10 +127,6 @@ public:
pipelineFwd_.clear(); pipelineFwd_.clear();
reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_); reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_);
resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_); resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_);
if (outVal_) {
// change original output value to mkldnn output value
output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
}
convertWeightsFromPaddle(); convertWeightsFromPaddle();
needResetBwd_ = true; needResetBwd_ = true;
} }
...@@ -264,7 +260,7 @@ protected: ...@@ -264,7 +260,7 @@ protected:
*/ */
virtual void resetOutGrad(MKLDNNMatrixPtr& out, virtual void resetOutGrad(MKLDNNMatrixPtr& out,
mkldnn::memory::primitive_desc pd) { mkldnn::memory::primitive_desc pd) {
CHECK(outputIsOnlyMKLDNN()) << "only support mixed with other device yet"; CHECK(outputIsOnlyMKLDNN()) << "do not support mixed with other device yet";
mergeGrad_ = nullptr; mergeGrad_ = nullptr;
out = MKLDNNMatrix::create(output_.grad, pd); out = MKLDNNMatrix::create(output_.grad, pd);
if (outputMap_.size() <= 1) { if (outputMap_.size() <= 1) {
......
...@@ -142,14 +142,16 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) { ...@@ -142,14 +142,16 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value;
cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_); cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_);
if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) { if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) {
out = MKLDNNMatrix::create(nullptr, out->getPrimitiveDesc());
cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
CHECK(cvtOutVal_) << "should not be emptry"; CHECK(cvtOutVal_) << "should not be emptry";
} else { } else {
// CPU output share the same data of MKLDNN output
cpuOut->setData(out->getData());
cpuOutVal_ = out; cpuOutVal_ = out;
} }
output_.value = std::dynamic_pointer_cast<Matrix>(cpuOutVal_);
return;
} }
output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
} }
void MKLDNNPoolLayer::resetFwdPD(std::shared_ptr<pool_fwd::primitive_desc>& pd, void MKLDNNPoolLayer::resetFwdPD(std::shared_ptr<pool_fwd::primitive_desc>& pd,
......
...@@ -124,8 +124,8 @@ void MKLDNNTester::randomTopDiffs() { ...@@ -124,8 +124,8 @@ void MKLDNNTester::randomTopDiffs() {
void MKLDNNTester::checkForward() { void MKLDNNTester::checkForward() {
VLOG(MKLDNN_ALL) << "Check Forward"; VLOG(MKLDNN_ALL) << "Check Forward";
printTopDatas(); printTopDatas();
double delta = compareMatrix(dnnLayer_->getOutput(CPU_DEVICE).value, double delta =
refLayer_->getOutputValue()); compareMatrix(dnnLayer_->getOutputValue(), refLayer_->getOutputValue());
EXPECT_LE(fabs(delta), eps_); EXPECT_LE(fabs(delta), eps_);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册