diff --git a/paddle/gserver/activations/MKLDNNActivation.h b/paddle/gserver/activations/MKLDNNActivation.h index bda9bbebe5600dbe26d11ff32058f7b2647b763e..86ffe387366409d81a91740cc8cea886e618f7e2 100644 --- a/paddle/gserver/activations/MKLDNNActivation.h +++ b/paddle/gserver/activations/MKLDNNActivation.h @@ -131,8 +131,9 @@ public: fwdPD_.reset(new eltwise_fwd::primitive_desc(fwdDesc, eng)); // use inplace for forward but save input value before submit inVal_ = val_; - if (act.grad) { - // only copy when need do backward + copyInVal_ = nullptr; + if (act.grad && algo == mkldnn::algorithm::eltwise_tanh) { + // tanh need save src input for backward inVal_ = MKLDNNMatrix::create(nullptr, val_->getPrimitiveDesc()); copyInVal_ = std::make_shared(*val_, *inVal_); CHECK(copyInVal_) << "should not be emptry"; diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp index 2647cb600653b4f43322016afb231a55f4db5642..88b047c89bd40aba1afc456c22a2870c62989c1c 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.cpp +++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp @@ -449,13 +449,14 @@ void MKLDNNConvLayer::resetOutGrad( cvtOutGrad_ = nullptr; if (!outputIsOnlyMKLDNN()) { const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; + outMat->setData(cpuOut->getData()); // same PrimitiveDesc with cpuInVal_ CHECK(cpuOutVal_); cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc()); if (cpuOutGrad_->getPrimitiveDesc() == out->getPrimitiveDesc()) { - outMat->setData(cpuOut->getData()); out = cpuOutGrad_; } else { + out = MKLDNNMatrix::create(nullptr, wgtPD->diff_dst_primitive_desc()); cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out); CHECK(cvtOutGrad_); } diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index 66b358bcea53f61ddcc15323704fa9f154fb2a73..afd092666bf8b8a3389b36aa1f0edb256a9968e6 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -232,6 +232,7 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, void MKLDNNFcLayer::resetOutGrad(MKLDNNMatrixPtr& out) { // TODO(TJ): merge outgrad int device = outputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE; + output_.grad->setData(getOutput(device).grad->getData()); // for MKLDNN device: // can not directly cast outputgrad to mkldnnmatrix, // since each layer can not write the inputgrad to mkldnn inputgrad. diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index c4e4a6874e6fdb491c344c70dfea422dc0924cd9..d8555a833187ddf64b096135e920e5be2b3a8c2f 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -141,18 +141,16 @@ public: } void backward(const UpdateCallback& callback) override { - /* Do derivation */ { + if (needResetBwd_) { + resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_); + needResetBwd_ = false; + } + { REGISTER_TIMER_INFO("BpActTimer", getName().c_str()); backwardActivation(); } - { REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str()); - if (needResetBwd_) { - resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_); - needResetBwd_ = false; - } - stream_->submit(pipelineBwd_); } diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp index 406181370faf90d29167b62173ce4c8af44d243e..1bfbbde4246a10eaf86693a6a2f237f390966db3 100644 --- a/paddle/gserver/tests/test_MKLDNN.cpp +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -26,17 +26,26 @@ DECLARE_bool(thread_local_rand_use_global_seed); DECLARE_bool(use_gpu); DECLARE_bool(use_mkldnn); -struct testFCDesc { +#define RUN_MKLDNN_TEST(DNN_CONFIG, REF_CONFIG, DESC) \ + MKLDNNTester tester; \ + for (auto bs : {DESC.bs, 1}) { \ + tester.run(DNN_CONFIG, REF_CONFIG, bs, DESC.ih, DESC.iw); \ + } + +#define RUN_MKLDNN_TEST_LAYER(DNN_CONFIG, REF_TYPE, DESC) \ + TestConfig ref = DNN_CONFIG; \ + ref.layerConfig.set_type(REF_TYPE); \ + RUN_MKLDNN_TEST(DNN_CONFIG, ref, DESC) + +struct testFcDesc { int bs; int ic; int oc; int ih, iw; // oh == ow == 1 }; -void testFcLayer(const testFCDesc& pm) { - const std::string compareTypes[] = {"mkldnn_fc", "fc"}; - TestConfig cfg; - cfg.layerConfig.set_type(compareTypes[0]); +static void getMKLDNNFcConfig(TestConfig& cfg, const testFcDesc& pm) { + cfg.layerConfig.set_type("mkldnn_fc"); cfg.layerConfig.set_size(pm.oc); cfg.inputDefs.push_back( {INPUT_DATA, @@ -44,25 +53,25 @@ void testFcLayer(const testFCDesc& pm) { /* size of input layer= */ size_t(pm.ic * pm.ih * pm.iw), /* size of weight= */ size_t(pm.oc * pm.ic * pm.ih * pm.iw)}); cfg.layerConfig.add_inputs(); +} - MKLDNNTester tester; +void testFcLayer(const testFcDesc& pm) { + TestConfig dnnConfig; + getMKLDNNFcConfig(dnnConfig, pm); for (auto biasSize : {pm.oc, 0}) { - cfg.biasSize = biasSize; - TestConfig ref = cfg; - ref.layerConfig.set_type(compareTypes[1]); - for (auto bs : {pm.bs, 1}) { - tester.run(cfg, ref, bs, pm.ih, pm.iw); - } + dnnConfig.biasSize = biasSize; + RUN_MKLDNN_TEST_LAYER(dnnConfig, "fc", pm) } } TEST(MKLDNNLayer, FcLayer) { - testFcLayer({/*bs*/ 2, /*ic*/ 2, /*oc*/ 3, /*ih*/ 1, /*iw*/ 1}); - testFcLayer({/*bs*/ 3, /*ic*/ 7, /*oc*/ 19, /*ih*/ 1, /*iw*/ 1}); - testFcLayer({/*bs*/ 8, /*ic*/ 16, /*oc*/ 32, /*ih*/ 13, /*iw*/ 13}); - testFcLayer({/*bs*/ 4, /*ic*/ 12, /*oc*/ 18, /*ih*/ 13, /*iw*/ 11}); - testFcLayer({/*bs*/ 2, /*ic*/ 64, /*oc*/ 32, /*ih*/ 16, /*iw*/ 16}); - testFcLayer({/*bs*/ 15, /*ic*/ 3, /*oc*/ 6, /*ih*/ 16, /*iw*/ 16}); + /* bs, ic, ih, iw, oc */ + testFcLayer({2, 2, 1, 1, 3}); + testFcLayer({3, 7, 1, 1, 19}); + testFcLayer({8, 16, 13, 13, 32}); + testFcLayer({4, 12, 13, 13, 18}); + testFcLayer({2, 64, 16, 16, 32}); + testFcLayer({15, 3, 16, 16, 6}); } struct testConvDesc { @@ -75,13 +84,10 @@ struct testConvDesc { int dh, dw; }; -void testConvLayer(const testConvDesc& pm) { - const std::string compareTypes[] = {"mkldnn_conv", "exconv"}; - TestConfig cfg; - cfg.layerConfig.set_type(compareTypes[0]); +static void getMKLDNNConvConfig(TestConfig& cfg, const testConvDesc& pm) { + cfg.layerConfig.set_type("mkldnn_conv"); cfg.layerConfig.set_num_filters(pm.oc); cfg.layerConfig.set_size(pm.oc * pm.oh * pm.ow); - // cfg.layerConfig.set_partial_sum(1); // TODO: check it cfg.layerConfig.set_shared_biases(true); cfg.inputDefs.push_back( {INPUT_DATA, @@ -115,15 +121,14 @@ void testConvLayer(const testConvDesc& pm) { int oh = outputSize(pm.ih, fh, pm.ph, pm.sh, true); CHECK_EQ(ow, pm.ow) << "output size check failed"; CHECK_EQ(oh, pm.oh) << "output size check failed"; +} - MKLDNNTester tester; +void testConvLayer(const testConvDesc& pm) { + TestConfig dnnConfig; + getMKLDNNConvConfig(dnnConfig, pm); for (auto biasSize : {pm.oc, 0}) { - cfg.biasSize = biasSize; - TestConfig ref = cfg; - ref.layerConfig.set_type(compareTypes[1]); - for (auto bs : {pm.bs, 1}) { - tester.run(cfg, ref, bs, pm.ih, pm.iw); - } + dnnConfig.biasSize = biasSize; + RUN_MKLDNN_TEST_LAYER(dnnConfig, "exconv", pm) } } @@ -143,7 +148,7 @@ TEST(MKLDNNLayer, ConvLayer) { } struct testPoolDesc { - int bs, ch; // input channel and output channel are the same + int bs, ic; // input channel and output channel are the same int ih, iw; int oh, ow; int fh, fw; @@ -151,19 +156,18 @@ struct testPoolDesc { int sh, sw; }; -void testPoolLayer(const testPoolDesc& pm) { - const std::string compareTypes[] = {"mkldnn_pool", "pool"}; - TestConfig cfg; - cfg.layerConfig.set_type(compareTypes[0]); - cfg.layerConfig.set_size(pm.ch * pm.oh * pm.ow); +static void getMKLDNNPoolConfig(TestConfig& cfg, const testPoolDesc& pm) { + cfg.layerConfig.set_type("mkldnn_pool"); + cfg.layerConfig.set_size(pm.ic * pm.oh * pm.ow); cfg.inputDefs.push_back( {INPUT_DATA, "layer_0", - /* size of input layer= */ size_t(pm.ch * pm.ih * pm.iw), + /* size of input layer= */ size_t(pm.ic * pm.ih * pm.iw), 0}); LayerInputConfig* input = cfg.layerConfig.add_inputs(); PoolConfig* pool = input->mutable_pool_conf(); - pool->set_channels(pm.ch); + pool->set_pool_type("avg-projection"); + pool->set_channels(pm.ic); pool->set_img_size(pm.iw); pool->set_img_size_y(pm.ih); pool->set_output_x(pm.ow); @@ -179,20 +183,21 @@ void testPoolLayer(const testPoolDesc& pm) { int ow = outputSize(pm.iw, pm.fw, pm.pw, pm.sw, false); CHECK_EQ(ow, pm.ow) << "output size check failed"; CHECK_EQ(oh, pm.oh) << "output size check failed"; +} - MKLDNNTester tester; +void testPoolLayer(const testPoolDesc& pm) { + TestConfig dnnConfig; + getMKLDNNPoolConfig(dnnConfig, pm); + LayerInputConfig* input = dnnConfig.layerConfig.mutable_inputs(0); + PoolConfig* pool = input->mutable_pool_conf(); for (auto type : {"max-projection", "avg-projection"}) { pool->set_pool_type(type); - TestConfig ref = cfg; - ref.layerConfig.set_type(compareTypes[1]); - for (auto bs : {pm.bs, 1}) { - tester.run(cfg, ref, bs, pm.ih, pm.iw); - } + RUN_MKLDNN_TEST_LAYER(dnnConfig, "pool", pm) } } TEST(MKLDNNLayer, PoolLayer) { - /* bs, ch, ih, iw, oh, ow, fh, fw, ph, pw, sh, sw*/ + /* bs, ch, ih, iw, oh, ow, fh, fw, ph, pw, sh, sw */ testPoolLayer({2, 1, 4, 4, 2, 2, 3, 3, 0, 0, 2, 2}); testPoolLayer({10, 8, 16, 16, 8, 8, 2, 2, 0, 0, 2, 2}); testPoolLayer({4, 2, 5, 5, 3, 3, 3, 3, 1, 1, 2, 2}); @@ -204,44 +209,36 @@ TEST(MKLDNNLayer, PoolLayer) { } struct testActDesc { - int bs, ch; - int ih, iw; + int bs, ic, ih, iw; }; static void getAddtoConfig(TestConfig& cfg, const testActDesc& pm) { cfg.biasSize = 0; cfg.layerConfig.set_type("addto"); - cfg.layerConfig.set_size(pm.ch * pm.ih * pm.iw); - cfg.inputDefs.push_back( - {INPUT_DATA, - "layer_0", - /* size of input layer= */ size_t(pm.ch * pm.ih * pm.iw), - 0}); + size_t layerSize = pm.ih * pm.ih * pm.iw; + cfg.layerConfig.set_size(layerSize); + cfg.inputDefs.push_back({INPUT_DATA, "layer_0", layerSize, 0}); cfg.layerConfig.add_inputs(); } -void testActivation(std::string& type, const testActDesc& pm) { - const std::string compareTypes[] = {type, type.erase(0, 7)}; +void testActivation(std::string& actType, const testActDesc& pm) { + // TODO(TJ): mkldnn_softmax not implemented, paddle do not have elu activation + if (actType == "mkldnn_softmax" || actType == "mkldnn_elu") { + return; + } + const std::string compareTypes[] = {actType, actType.erase(0, 7)}; TestConfig cfg; getAddtoConfig(cfg, pm); - TestConfig ref = cfg; cfg.layerConfig.set_active_type(compareTypes[0]); ref.layerConfig.set_active_type(compareTypes[1]); - MKLDNNTester tester; - for (auto bs : {pm.bs, 1}) { - tester.run(cfg, ref, bs, pm.ih, pm.iw); - } + RUN_MKLDNN_TEST(cfg, ref, pm) } TEST(MKLDNNActivation, Activations) { auto types = MKLDNNActivation::getAllRegisteredTypes(); - // TODO(TJ): mkldnn_softmax not implemented, paddle do not have elu activation - std::set excluded{"mkldnn_softmax", "mkldnn_elu"}; for (auto type : types) { - if (excluded.count(type)) { - continue; - } + /* bs, c, h, w*/ testActivation(type, {16, 64, 32, 32}); } } diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 7c32eb0069f4075d72cd4c3654c83e3d5c98fb1c..0f57b81966647ca5c6f5cd2e5518d2d34942a549 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1565,6 +1565,10 @@ class LayerBase(object): self.config = g_config.model_config.layers.add() assert isinstance(self.config, LayerConfig) + use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) + mkldnn_acts = ['relu', 'tanh'] + if use_mkldnn and active_type in mkldnn_acts: + active_type = "mkldnn_" + active_type self.config.name = name self.config.type = type self.config.active_type = active_type