提交 1199aa68 编写于 作者: T tensor-tang

fix bug: clear grad and always share data when output has cpu

and add activation in unit tests
上级 dee7f813
...@@ -313,6 +313,7 @@ void MKLDNNConvLayer::resetOutValue( ...@@ -313,6 +313,7 @@ void MKLDNNConvLayer::resetOutValue(
cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
CHECK(cvtOutVal_) << "should not be empty"; CHECK(cvtOutVal_) << "should not be empty";
} else { } else {
cpuOut->setData(output_.value->getData());
cpuOutVal_ = out; cpuOutVal_ = out;
} }
// when output is cpu device, change the mkldnn output value and make them // when output is cpu device, change the mkldnn output value and make them
...@@ -456,17 +457,18 @@ void MKLDNNConvLayer::resetOutGrad( ...@@ -456,17 +457,18 @@ void MKLDNNConvLayer::resetOutGrad(
MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc()); MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc());
} else { } else {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
// always share the same grad data of CPU output
// then the activation can get the right grad from output_.grad
output_.grad->setData(cpuOut->getData());
// same PrimitiveDesc with cpuInVal_ // same PrimitiveDesc with cpuInVal_
CHECK(cpuOutVal_); CHECK(cpuOutVal_);
cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc()); cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc());
// create reorder if primitive desc does not match // create reorder if primitive desc does not match
if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) { if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) {
out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc()); out = MKLDNNMatrix::create(nullptr, outVal_->getPrimitiveDesc());
cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out); cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out);
CHECK(cvtOutGrad_); CHECK(cvtOutGrad_);
} else { } else {
// share the same data of CPU output
output_.grad->setData(cpuOut->getData());
out = cpuOutGrad_; out = cpuOutGrad_;
} }
} }
......
...@@ -46,6 +46,9 @@ protected: ...@@ -46,6 +46,9 @@ protected:
// backward also need reset after reset forward handle // backward also need reset after reset forward handle
bool needResetBwd_; bool needResetBwd_;
// is output only mkldnn
bool outputOnlyMKLDNN_;
// mkldnn engine, stream and primivtives // mkldnn engine, stream and primivtives
mkldnn::engine engine_; mkldnn::engine engine_;
std::shared_ptr<MKLDNNStream> stream_; std::shared_ptr<MKLDNNStream> stream_;
...@@ -141,6 +144,9 @@ public: ...@@ -141,6 +144,9 @@ public:
updateInputData(); updateInputData();
} }
if (!outputOnlyMKLDNN_) {
clearGrads();
}
stream_->submit(pipelineFwd_); stream_->submit(pipelineFwd_);
} }
...@@ -389,7 +395,8 @@ protected: ...@@ -389,7 +395,8 @@ protected:
CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE) CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE)
<< "Only support other device is CPU yet"; << "Only support other device is CPU yet";
} }
return outputOtherDevice_.size() == 0; outputOnlyMKLDNN_ = outputOtherDevice_.size() == 0;
return outputOnlyMKLDNN_;
} }
/** /**
...@@ -398,6 +405,16 @@ protected: ...@@ -398,6 +405,16 @@ protected:
void setDevice(int id) { deviceId_ = id; } void setDevice(int id) { deviceId_ = id; }
private: private:
/**
* clear all grad
*/
void clearGrads() {
output_.grad->zeroMem();
for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
outputOtherDevice_[i].grad->zeroMem();
}
}
/** /**
* Set deviceId of the params used in this layer. * Set deviceId of the params used in this layer.
*/ */
......
...@@ -146,6 +146,7 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) { ...@@ -146,6 +146,7 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) {
cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
CHECK(cvtOutVal_) << "should not be emptry"; CHECK(cvtOutVal_) << "should not be emptry";
} else { } else {
cpuOut->setData(output_.value->getData());
cpuOutVal_ = out; cpuOutVal_ = out;
} }
output_.value = std::dynamic_pointer_cast<Matrix>(cpuOutVal_); output_.value = std::dynamic_pointer_cast<Matrix>(cpuOutVal_);
...@@ -213,15 +214,16 @@ void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) { ...@@ -213,15 +214,16 @@ void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) {
MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc()); MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc());
} else { } else {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
// always share the same grad data of CPU output
// then the activation can get the right grad from output_.grad
output_.grad->setData(cpuOut->getData());
cpuOutGrad_ = MKLDNNMatrix::create( cpuOutGrad_ = MKLDNNMatrix::create(
cpuOut, memory::dims{bs_, oc_, oh_, ow_}, format::nchw, engine_); cpuOut, memory::dims{bs_, oc_, oh_, ow_}, format::nchw, engine_);
if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) { if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) {
out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc()); out = MKLDNNMatrix::create(nullptr, outVal_->getPrimitiveDesc());
cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out); cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out);
CHECK(cvtOutGrad_) << "should not be emptry"; CHECK(cvtOutGrad_) << "should not be emptry";
} else { } else {
// share the same data of CPU output
output_.grad->setData(cpuOut->getData());
out = cpuOutGrad_; out = cpuOutGrad_;
} }
} }
......
...@@ -46,6 +46,7 @@ struct testFcDesc { ...@@ -46,6 +46,7 @@ struct testFcDesc {
static void getMKLDNNFcConfig(TestConfig& cfg, const testFcDesc& pm) { static void getMKLDNNFcConfig(TestConfig& cfg, const testFcDesc& pm) {
cfg.layerConfig.set_type("mkldnn_fc"); cfg.layerConfig.set_type("mkldnn_fc");
cfg.layerConfig.set_active_type("sigmoid");
cfg.layerConfig.set_size(pm.oc); cfg.layerConfig.set_size(pm.oc);
cfg.inputDefs.push_back( cfg.inputDefs.push_back(
{INPUT_DATA, {INPUT_DATA,
...@@ -86,6 +87,7 @@ struct testConvDesc { ...@@ -86,6 +87,7 @@ struct testConvDesc {
static void getMKLDNNConvConfig(TestConfig& cfg, const testConvDesc& pm) { static void getMKLDNNConvConfig(TestConfig& cfg, const testConvDesc& pm) {
cfg.layerConfig.set_type("mkldnn_conv"); cfg.layerConfig.set_type("mkldnn_conv");
cfg.layerConfig.set_active_type("relu");
cfg.layerConfig.set_num_filters(pm.oc); cfg.layerConfig.set_num_filters(pm.oc);
cfg.layerConfig.set_size(pm.oc * pm.oh * pm.ow); cfg.layerConfig.set_size(pm.oc * pm.oh * pm.ow);
cfg.layerConfig.set_shared_biases(true); cfg.layerConfig.set_shared_biases(true);
...@@ -158,6 +160,7 @@ struct testPoolDesc { ...@@ -158,6 +160,7 @@ struct testPoolDesc {
static void getMKLDNNPoolConfig(TestConfig& cfg, const testPoolDesc& pm) { static void getMKLDNNPoolConfig(TestConfig& cfg, const testPoolDesc& pm) {
cfg.layerConfig.set_type("mkldnn_pool"); cfg.layerConfig.set_type("mkldnn_pool");
cfg.layerConfig.set_active_type("relu");
cfg.layerConfig.set_size(pm.ic * pm.oh * pm.ow); cfg.layerConfig.set_size(pm.ic * pm.oh * pm.ow);
cfg.inputDefs.push_back( cfg.inputDefs.push_back(
{INPUT_DATA, {INPUT_DATA,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册