提交 1199aa68 编写于 作者: T tensor-tang

fix bug: clear grad and always share data when output has cpu

and add activation in unit tests
上级 dee7f813
......@@ -313,6 +313,7 @@ void MKLDNNConvLayer::resetOutValue(
cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
CHECK(cvtOutVal_) << "should not be empty";
} else {
cpuOut->setData(output_.value->getData());
cpuOutVal_ = out;
}
// when output is cpu device, change the mkldnn output value and make them
......@@ -456,17 +457,18 @@ void MKLDNNConvLayer::resetOutGrad(
MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc());
} else {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
// always share the same grad data of CPU output
// then the activation can get the right grad from output_.grad
output_.grad->setData(cpuOut->getData());
// same PrimitiveDesc with cpuInVal_
CHECK(cpuOutVal_);
cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc());
// create reorder if primitive desc does not match
if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) {
out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc());
out = MKLDNNMatrix::create(nullptr, outVal_->getPrimitiveDesc());
cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out);
CHECK(cvtOutGrad_);
} else {
// share the same data of CPU output
output_.grad->setData(cpuOut->getData());
out = cpuOutGrad_;
}
}
......
......@@ -46,6 +46,9 @@ protected:
// backward also need reset after reset forward handle
bool needResetBwd_;
// is output only mkldnn
bool outputOnlyMKLDNN_;
// mkldnn engine, stream and primivtives
mkldnn::engine engine_;
std::shared_ptr<MKLDNNStream> stream_;
......@@ -141,6 +144,9 @@ public:
updateInputData();
}
if (!outputOnlyMKLDNN_) {
clearGrads();
}
stream_->submit(pipelineFwd_);
}
......@@ -389,7 +395,8 @@ protected:
CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE)
<< "Only support other device is CPU yet";
}
return outputOtherDevice_.size() == 0;
outputOnlyMKLDNN_ = outputOtherDevice_.size() == 0;
return outputOnlyMKLDNN_;
}
/**
......@@ -398,6 +405,16 @@ protected:
void setDevice(int id) { deviceId_ = id; }
private:
/**
* clear all grad
*/
void clearGrads() {
output_.grad->zeroMem();
for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
outputOtherDevice_[i].grad->zeroMem();
}
}
/**
* Set deviceId of the params used in this layer.
*/
......
......@@ -146,6 +146,7 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) {
cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
CHECK(cvtOutVal_) << "should not be emptry";
} else {
cpuOut->setData(output_.value->getData());
cpuOutVal_ = out;
}
output_.value = std::dynamic_pointer_cast<Matrix>(cpuOutVal_);
......@@ -213,15 +214,16 @@ void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) {
MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc());
} else {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
// always share the same grad data of CPU output
// then the activation can get the right grad from output_.grad
output_.grad->setData(cpuOut->getData());
cpuOutGrad_ = MKLDNNMatrix::create(
cpuOut, memory::dims{bs_, oc_, oh_, ow_}, format::nchw, engine_);
if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) {
out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc());
out = MKLDNNMatrix::create(nullptr, outVal_->getPrimitiveDesc());
cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out);
CHECK(cvtOutGrad_) << "should not be emptry";
} else {
// share the same data of CPU output
output_.grad->setData(cpuOut->getData());
out = cpuOutGrad_;
}
}
......
......@@ -46,6 +46,7 @@ struct testFcDesc {
static void getMKLDNNFcConfig(TestConfig& cfg, const testFcDesc& pm) {
cfg.layerConfig.set_type("mkldnn_fc");
cfg.layerConfig.set_active_type("sigmoid");
cfg.layerConfig.set_size(pm.oc);
cfg.inputDefs.push_back(
{INPUT_DATA,
......@@ -86,6 +87,7 @@ struct testConvDesc {
static void getMKLDNNConvConfig(TestConfig& cfg, const testConvDesc& pm) {
cfg.layerConfig.set_type("mkldnn_conv");
cfg.layerConfig.set_active_type("relu");
cfg.layerConfig.set_num_filters(pm.oc);
cfg.layerConfig.set_size(pm.oc * pm.oh * pm.ow);
cfg.layerConfig.set_shared_biases(true);
......@@ -158,6 +160,7 @@ struct testPoolDesc {
static void getMKLDNNPoolConfig(TestConfig& cfg, const testPoolDesc& pm) {
cfg.layerConfig.set_type("mkldnn_pool");
cfg.layerConfig.set_active_type("relu");
cfg.layerConfig.set_size(pm.ic * pm.oh * pm.ow);
cfg.inputDefs.push_back(
{INPUT_DATA,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册