diff --git a/paddle/gserver/layers/Layer.h b/paddle/gserver/layers/Layer.h index 4002a3d0747a86ab7b495ffe52247521831b71b8..9813a556076bc2666869a85225feaf10f345217a 100644 --- a/paddle/gserver/layers/Layer.h +++ b/paddle/gserver/layers/Layer.h @@ -86,6 +86,7 @@ protected: /// Also used in 'use_mkldnn' case. std::vector outputOtherDevice_; /// If there are several outputs, map them by each name. + /// MKLDNNLayer use it only to merge output grad std::map outputMap_; /// Used to merge grad on different devices. MatrixPtr tmpGrad_; @@ -325,6 +326,11 @@ public: outputMap_[name] = output; } + /** + * Get the output map size, if layer has multi-output. + */ + size_t getOutputMapSize() { return outputMap_.size(); } + /** * Get the output based on layer's name. */ diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp index 0d6742e909635c1097b4fe21bbb304f8a71af5cb..8b67a1ef4ffdd42559f8078873ed135751d56674 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.cpp +++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp @@ -225,8 +225,6 @@ void MKLDNNConvLayer::resetFwdPipeline( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - pipeline.clear(); - if (cvtInVal_) { pipeline.push_back(*cvtInVal_); } @@ -245,7 +243,7 @@ void MKLDNNConvLayer::resetFwdPipeline( void MKLDNNConvLayer::resetInValue( std::shared_ptr& pd, MKLDNNMatrixPtr& in) { - const MatrixPtr& inMat = inputLayers_[0]->getOutput().value; + const MatrixPtr& inMat = inputLayers_[0]->getOutputValue(); in = MKLDNNMatrix::create(inMat, pd->src_primitive_desc()); // create buffer and reorder if input value do not match @@ -310,15 +308,20 @@ void MKLDNNConvLayer::resetOutValue( const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_); - if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) { + if (cpuOutVal_->getPrimitiveDesc() != pd->dst_primitive_desc()) { + out = MKLDNNMatrix::create(nullptr, pd->dst_primitive_desc()); cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); - CHECK(cvtOutVal_) << "should not be emptry"; + CHECK(cvtOutVal_) << "should not be empty"; } else { - // CPU output share the same data of MKLDNN output - cpuOut->setData(out->getData()); cpuOutVal_ = out; } + // when output is cpu device, change the mkldnn output value and make them + // share the same data. Then if next layer use inputlayer->getOuputValue() + // to achieve the input value, it will get the right data. + output_.value = std::dynamic_pointer_cast(cpuOutVal_); + return; } + output_.value = std::dynamic_pointer_cast(out); } void MKLDNNConvLayer::resetBwdWgtPD( @@ -412,8 +415,6 @@ void MKLDNNConvLayer::resetBwdPipeline( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - pipeline.clear(); - if (cvtOutGrad_) { pipeline.push_back(*cvtOutGrad_); } @@ -446,28 +447,27 @@ void MKLDNNConvLayer::resetBwdPipeline( void MKLDNNConvLayer::resetOutGrad( std::shared_ptr& wgtPD, MKLDNNMatrixPtr& out) { - const MatrixPtr& outMat = output_.grad; - out = MKLDNNMatrix::create(outMat, wgtPD->diff_dst_primitive_desc()); - CHECK(outVal_ != nullptr && - out->getPrimitiveDesc() == outVal_->getPrimitiveDesc()) - << "primitive desc of out grad and value should be equal"; - - // TODO(TJ): merge outgrad - // create reorder if has output grad does not match cpuOutGrad_ = nullptr; cvtOutGrad_ = nullptr; - if (!outputIsOnlyMKLDNN()) { + CHECK(outVal_ != nullptr && + outVal_->getPrimitiveDesc() == wgtPD->diff_dst_primitive_desc()) + << "primitive desc of out grad and value should be equal"; + if (outputIsOnlyMKLDNN()) { + MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc()); + } else { const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; - outMat->setData(cpuOut->getData()); // same PrimitiveDesc with cpuInVal_ CHECK(cpuOutVal_); cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc()); - if (cpuOutGrad_->getPrimitiveDesc() == out->getPrimitiveDesc()) { - out = cpuOutGrad_; - } else { - out = MKLDNNMatrix::create(nullptr, wgtPD->diff_dst_primitive_desc()); + // create reorder if primitive desc does not match + if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) { + out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc()); cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out); CHECK(cvtOutGrad_); + } else { + // share the same data of CPU output + output_.grad->setData(cpuOut->getData()); + out = cpuOutGrad_; } } } @@ -496,32 +496,30 @@ void MKLDNNConvLayer::resetWgtBiasGrad( void MKLDNNConvLayer::resetInGrad( std::shared_ptr& dataPD, MKLDNNMatrixPtr& in) { + in = nullptr; + cpuInGrad_ = nullptr; + cvtInGrad_ = nullptr; if (dataPD == nullptr) { return; } - // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done - in = MKLDNNMatrix::create(inputLayers_[0]->getOutput().grad, - dataPD->diff_src_primitive_desc()); - CHECK(nullptr != inVal_ && - in->getPrimitiveDesc() == inVal_->getPrimitiveDesc()) - << "primitive desc of input grad and value should be equal"; - - // create reorder if has output grad does not match - cpuInGrad_ = nullptr; - cvtInGrad_ = nullptr; - if (!inputIsOnlyMKLDNN()) { + if (inputIsOnlyMKLDNN()) { + MKLDNNLayer::resetInGrad(in, dataPD->diff_src_primitive_desc()); + CHECK(nullptr != inVal_ && + in->getPrimitiveDesc() == inVal_->getPrimitiveDesc()) + << "primitive desc of input grad and value should be equal"; + } else { const MatrixPtr& cpuIn = getInputGrad(0, CPU_DEVICE); // same PrimitiveDesc with cpuInVal_ CHECK(cpuInVal_); cpuInGrad_ = MKLDNNMatrix::create(cpuIn, cpuInVal_->getPrimitiveDesc()); - if (cpuInGrad_->getPrimitiveDesc() != in->getPrimitiveDesc()) { - const MatrixPtr& dnnIn = getInputGrad(0, MKLDNN_DEVICE); - in = MKLDNNMatrix::create(dnnIn, in->getPrimitiveDesc()); + in = cpuInGrad_; + // create reorder if PrimitiveDesc does not match + if (cpuInGrad_->getPrimitiveDesc() != dataPD->diff_src_primitive_desc()) { + in = MKLDNNMatrix::create(getInputGrad(0, MKLDNN_DEVICE), + dataPD->diff_src_primitive_desc()); cvtInGrad_ = MKLDNNMatrix::createReorder(in, cpuInGrad_); CHECK(cvtInGrad_); - } else { - in = cpuInGrad_; } } } diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index e829456d6afd7cc844f752d4571cd9f90c73997f..cf19a155681f3a1ceb20af67245c8f2b8fa8fa73 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -180,10 +180,10 @@ void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt, void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) { out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_); if (!outputIsOnlyMKLDNN()) { - // fc cpu output value do not need create convert - // just share point + // fc cpu output value do not need create convert, just share data getOutput(CPU_DEVICE).value->setData(out->getData()); } + output_.value = std::dynamic_pointer_cast(out); } void MKLDNNFcLayer::resetFwdPD(std::shared_ptr& pd, @@ -214,8 +214,6 @@ void MKLDNNFcLayer::resetFwdPipeline( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - pipeline.clear(); - if (bias) { fwd_.reset(new fc_fwd(*pd, *in, *wgt, *bias, *out)); } else { @@ -237,19 +235,14 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, } void MKLDNNFcLayer::resetOutGrad(MKLDNNMatrixPtr& out) { - // TODO(TJ): merge outgrad - int device = outputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE; - output_.grad->setData(getOutput(device).grad->getData()); - // for MKLDNN device: - // can not directly cast outputgrad to mkldnnmatrix, - // since each layer can not write the inputgrad to mkldnn inputgrad. - // So just create from matrix with outputvalue format. - // for CPU device: - // fc do not need to convert from cpu device since output is always nc format - // only need create from cpu device CHECK(outVal_); - out = - MKLDNNMatrix::create(getOutput(device).grad, outVal_->getPrimitiveDesc()); + if (outputIsOnlyMKLDNN()) { + MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc()); + } else { + const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; + output_.grad->setData(cpuOut->getData()); + out = MKLDNNMatrix::create(cpuOut, outVal_->getPrimitiveDesc()); + } } void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt, @@ -267,13 +260,11 @@ void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt, void MKLDNNFcLayer::resetInGrad(MKLDNNMatrixPtr& in) { in = nullptr; - const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad; - if (inGrad == nullptr) { + if (inputLayers_[0]->getOutput().grad == nullptr) { return; } - // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done CHECK(inVal_); - in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc()); + MKLDNNLayer::resetInGrad(in, inVal_->getPrimitiveDesc()); } void MKLDNNFcLayer::resetBwdWgtPD( @@ -314,7 +305,6 @@ void MKLDNNFcLayer::resetBwdPipeline( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - pipeline.clear(); CHECK(inVal_); if (bias) { bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt, *bias)); diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index c09fd89462ef4fdaeaae3e122f96b0cc6ce373ea..5f9923da769781287e39a3aaaf92248dfe09f225 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -65,6 +65,17 @@ protected: MKLDNNMatrixPtr biasVal_; MKLDNNMatrixPtr biasGrad_; + // merge grad primitive + std::shared_ptr mergeGrad_; + std::vector pipelineMergeGrad_; + // tmp input argument to save input grad, only used to merge grad + Argument tmpInArg_; + // since mkldnn sum do not support different formats: + // can refer to https://github.com/01org/mkl-dnn/issues/134 + // so need create reorder manually and save tmp MKLDNNMatrix + MKLDNNMatrixPtr tmpOutGrad_; + std::shared_ptr tmpCvt_; + public: explicit MKLDNNLayer(const LayerConfig& config) : Layer(config), @@ -99,6 +110,7 @@ public: if (!Layer::init(layerMap, parameterMap)) { return false; } + setOutputMap(); checkCPUOutputsNumber(); stream_.reset(new MKLDNNStream()); @@ -118,12 +130,9 @@ public: VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward"; // reset when input total sizes changed, not only the batchsize inputElemenCnt_ = elemenCnt; + pipelineFwd_.clear(); reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_); resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_); - if (outVal_) { - // change original output value to mkldnn output value - output_.value = std::dynamic_pointer_cast(outVal_); - } convertWeightsFromPaddle(); needResetBwd_ = true; } @@ -144,9 +153,18 @@ public: void backward(const UpdateCallback& callback) override { if (needResetBwd_) { VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward"; + pipelineBwd_.clear(); + pipelineMergeGrad_.clear(); + mergeGrad_ = nullptr; resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_); needResetBwd_ = false; } + + // merge grad must before backward activation + if (mergeGrad_) { + REGISTER_TIMER_INFO("MergeBpGrad", getName().c_str()); + stream_->submit(pipelineMergeGrad_); + } { REGISTER_TIMER_INFO("BpActTimer", getName().c_str()); backwardActivation(); @@ -247,6 +265,76 @@ protected: } } + /** + * reset the output grad matrix from primitive desc. + * and reset the merge grad primitive if needed. + * note: when this layer has serval outputs, + * it could not be mixed with cpu device, + * since it can not get memory desc from cpu device. + */ + virtual void resetOutGrad(MKLDNNMatrixPtr& out, + mkldnn::memory::primitive_desc pd) { + CHECK(outputIsOnlyMKLDNN()) << "do not support mixed with other device yet"; + mergeGrad_ = nullptr; + pipelineMergeGrad_.clear(); + out = MKLDNNMatrix::create(output_.grad, pd); + if (outputMap_.size() <= 1) { + return; + } + std::vector scales(outputMap_.size(), 1.0); + std::vector srcPDs; + std::vector srcs; + for (auto it = outputMap_.begin(); it != outputMap_.end(); ++it) { + MKLDNNMatrixPtr src = + std::dynamic_pointer_cast(it->second->grad); + VLOG(MKLDNN_BASE) << getName() << " has output grad " << it->first; + CHECK(src) << "should be MKLDNNMatrix"; + auto srcDims = src->getDims(); + auto dstDims = out->getDims(); + CHECK_EQ(srcDims.size(), dstDims.size()); + for (size_t i = 0; i < srcDims.size(); ++i) { + CHECK_EQ(srcDims[i], dstDims[i]); + } + srcPDs.push_back(src->getPrimitiveDesc()); + srcs.push_back(*src); + } + + // TODO(TJ): remove me when mkldnn sum support different formats + for (size_t i = 1; i < srcPDs.size(); ++i) { + CHECK(srcPDs[0] == srcPDs[i]); + } + tmpOutGrad_ = nullptr; + tmpCvt_ = nullptr; + if (out->getPrimitiveDesc() != srcPDs[0]) { + tmpOutGrad_ = MKLDNNMatrix::create(nullptr, srcPDs[0]); + tmpCvt_ = MKLDNNMatrix::createReorder(tmpOutGrad_, out); + CHECK(tmpCvt_); + pipelineMergeGrad_.push_back(*tmpCvt_); + } else { + tmpOutGrad_ = out; + } + + auto sumPD = mkldnn::sum::primitive_desc( + tmpOutGrad_->getMemoryDesc(), scales, srcPDs); + mergeGrad_.reset(new mkldnn::sum(sumPD, srcs, *tmpOutGrad_)); + pipelineMergeGrad_.insert(pipelineMergeGrad_.begin(), *mergeGrad_); + } + + /** + * reset input grad from primitive desc. + * this function is avaiable for input is only mkldnn + * or input do not care cpu device + */ + virtual void resetInGrad(MKLDNNMatrixPtr& in, + mkldnn::memory::primitive_desc pd) { + LayerPtr& input = inputLayers_[0]; + const MatrixPtr& grad = + input->getOutputMapSize() > 1 ? nullptr : input->getOutput().grad; + in = MKLDNNMatrix::create(grad, pd); + Argument& arg = input->getOutput(this->getName()); + arg.grad = std::dynamic_pointer_cast(in); + } + /** * print info about sizes */ @@ -334,6 +422,16 @@ private: } } + /** + * Set output map of prev layers. + */ + void setOutputMap() { + outputMap_.clear(); + for (size_t i = 0; i < inputLayers_.size(); ++i) { + inputLayers_[i]->setOutput(getName(), &tmpInArg_); + } + } + /** * Check the cpu device number of outputOtherDevice_. * should have only one at most. diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.cpp b/paddle/gserver/layers/MKLDNNPoolLayer.cpp index b62dfb7c54258a593aa50d5b30096423f375c69d..5606aae80ce8e9a1e571d3c057c471b26a59d032 100644 --- a/paddle/gserver/layers/MKLDNNPoolLayer.cpp +++ b/paddle/gserver/layers/MKLDNNPoolLayer.cpp @@ -142,14 +142,16 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) { const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_); if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) { + out = MKLDNNMatrix::create(nullptr, out->getPrimitiveDesc()); cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); CHECK(cvtOutVal_) << "should not be emptry"; } else { - // CPU output share the same data of MKLDNN output - cpuOut->setData(out->getData()); cpuOutVal_ = out; } + output_.value = std::dynamic_pointer_cast(cpuOutVal_); + return; } + output_.value = std::dynamic_pointer_cast(outVal_); } void MKLDNNPoolLayer::resetFwdPD(std::shared_ptr& pd, @@ -187,7 +189,6 @@ void MKLDNNPoolLayer::resetFwdPipeline( std::shared_ptr& pd, MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out) { - pipeline.clear(); fwd_ = workspace_ ? std::make_shared(pool_fwd(*pd, *in, *out, *workspace_)) : std::make_shared(pool_fwd(*pd, *in, *out)); @@ -205,17 +206,17 @@ void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, resetInGrad(in); } void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) { - CHECK(outVal_) << "Should have output value"; - out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc()); - - // create reorder if output value has cpu device and pd do not match cpuOutGrad_ = nullptr; cvtOutGrad_ = nullptr; - if (!outputIsOnlyMKLDNN()) { + CHECK(outVal_); + if (outputIsOnlyMKLDNN()) { + MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc()); + } else { const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; cpuOutGrad_ = MKLDNNMatrix::create( cpuOut, memory::dims{bs_, oc_, oh_, ow_}, format::nchw, engine_); - if (cpuOutGrad_->getPrimitiveDesc() != out->getPrimitiveDesc()) { + if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) { + out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc()); cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out); CHECK(cvtOutGrad_) << "should not be emptry"; } else { @@ -228,12 +229,11 @@ void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) { void MKLDNNPoolLayer::resetInGrad(MKLDNNMatrixPtr& in) { in = nullptr; - const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad; - if (inGrad == nullptr) { + if (inputLayers_[0]->getOutput().grad == nullptr) { return; } CHECK(inVal_); - in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc()); + MKLDNNLayer::resetInGrad(in, inVal_->getPrimitiveDesc()); } void MKLDNNPoolLayer::resetBwdPD(std::shared_ptr& pd, @@ -261,7 +261,6 @@ void MKLDNNPoolLayer::resetBwdPipeline( std::shared_ptr& pd, MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out) { - pipeline.clear(); if (cvtOutGrad_) { pipeline.push_back(*cvtOutGrad_); } diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp index f59618be9d09d146be52fb51cae84f4d24c15ef1..eaebdd671cfa1b37e5efe149588ca23fdc402a8e 100644 --- a/paddle/gserver/tests/MKLDNNTester.cpp +++ b/paddle/gserver/tests/MKLDNNTester.cpp @@ -124,8 +124,8 @@ void MKLDNNTester::randomTopDiffs() { void MKLDNNTester::checkForward() { VLOG(MKLDNN_ALL) << "Check Forward"; printTopDatas(); - double delta = compareMatrix(dnnLayer_->getOutput(CPU_DEVICE).value, - refLayer_->getOutputValue()); + double delta = + compareMatrix(dnnLayer_->getOutputValue(), refLayer_->getOutputValue()); EXPECT_LE(fabs(delta), eps_); } diff --git a/paddle/trainer/tests/CMakeLists.txt b/paddle/trainer/tests/CMakeLists.txt index 066837ca959e46dbe3b39c661aa1bab11cbf2734..5ebbb99c94bce45d295ae0bf585f2cf864bfc4d4 100644 --- a/paddle/trainer/tests/CMakeLists.txt +++ b/paddle/trainer/tests/CMakeLists.txt @@ -39,15 +39,18 @@ add_test(NAME test_CompareTwoNets ################ test_CompareMKLDNNandCPU ###################### if(WITH_MKLDNN) - add_unittest_without_exec(test_CompareMKLDNNandCPU - test_CompareTwoNets.cpp) - add_test(NAME test_CompareMKLDNNandCPU - COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ - ${CMAKE_CURRENT_BINARY_DIR}/test_CompareMKLDNNandCPU - --config_file_a=trainer/tests/sample_trainer_config_simple_net.conf --use_mkldnn_a=True - --config_file_b=trainer/tests/sample_trainer_config_simple_net.conf --use_mkldnn_b=False - --use_gpu=False - WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) + macro(gen_command VAR_NAME CONFIG_FILE) + set(${VAR_NAME} "${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh" "-d" "${PADDLE_SOURCE_DIR}/python/" + "${CMAKE_CURRENT_BINARY_DIR}/test_CompareMKLDNNandCPU --use_gpu=False" + "--config_file_a=trainer/tests/${CONFIG_FILE} --use_mkldnn_a=True" + "--config_file_b=trainer/tests/${CONFIG_FILE} --use_mkldnn_b=False" + "WORKING_DIRECTORY" "${PADDLE_SOURCE_DIR}/paddle/") + endmacro() + add_unittest_without_exec(test_CompareMKLDNNandCPU test_CompareTwoNets.cpp) + gen_command(compare_simple_net "sample_trainer_config_simple_net.conf") + gen_command(compare_branch_net "sample_trainer_config_branch_net.conf") + add_test(NAME test_CompareMKLDNNandCPU_simple_net COMMAND ${compare_simple_net}) + add_test(NAME test_CompareMKLDNNandCPU_branch_net COMMAND ${compare_branch_net}) endif() ############### test_CompareTwoOpts ################### diff --git a/paddle/trainer/tests/sample_trainer_config_branch_net.conf b/paddle/trainer/tests/sample_trainer_config_branch_net.conf new file mode 100644 index 0000000000000000000000000000000000000000..c2594bc13c250a877a7b8a77e11405671c4d8907 --- /dev/null +++ b/paddle/trainer/tests/sample_trainer_config_branch_net.conf @@ -0,0 +1,103 @@ +# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +################################### Data Configuration ################################### +TrainData(ProtoData(files = "trainer/tests/mnist.list")) +################################### Algorithm Configuration ################################### +settings(batch_size = 256, + learning_method = MomentumOptimizer(momentum=0.5, sparse=False)) +################################### Network Configuration ################################### +data = data_layer(name ="input", size=784) + +tmp = img_conv_layer(input=data, + num_channels=1, + filter_size=3, + num_filters=32, + padding=1, + shared_biases=True, + act=ReluActivation()) + +a1 = img_conv_layer(input=tmp, + filter_size=1, + num_filters=32, + padding=0, + shared_biases=True, + act=ReluActivation()) + +a2 = img_conv_layer(input=tmp, + filter_size=3, + num_filters=32, + padding=1, + shared_biases=True, + act=ReluActivation()) + +tmp = concat_layer(input=[a1, a2]) + +tmp = img_pool_layer(input=tmp, + num_channels=64, + pool_size=3, + stride=2, + padding=1, + pool_type=AvgPooling()) + +b1 = img_conv_layer(input=tmp, + filter_size=3, + num_filters=64, + padding=1, + shared_biases=True, + act=ReluActivation()) + +b1 = img_pool_layer(input=b1, + pool_size=3, + stride=1, + padding=1, + pool_type=MaxPooling()) + +b2 = img_conv_layer(input=tmp, + filter_size=5, + num_filters=64, + padding=2, + shared_biases=True, + act=ReluActivation()) + +b2 = img_pool_layer(input=b2, + pool_size=5, + stride=1, + padding=2, + pool_type=MaxPooling()) + +tmp = addto_layer(input=[b1, b2], + act=ReluActivation(), + bias_attr=False) + +tmp = img_pool_layer(input=tmp, + pool_size=3, + stride=2, + padding=1, + pool_type=MaxPooling()) + +tmp = fc_layer(input=tmp, size=64, + bias_attr=False, + act=TanhActivation()) + +output = fc_layer(input=tmp, size=10, + bias_attr=True, + act=SoftmaxActivation()) + +lbl = data_layer(name ="label", size=10) + +cost = classification_cost(input=output, label=lbl) +outputs(cost)