diff --git a/paddle/gserver/layers/MKLDNNBase.h b/paddle/gserver/layers/MKLDNNBase.h index 4c0234e7b3a91053596c32cea581fa5d1e26b9d5..af02a37cad668708f77ecf423549a8ec993e54fb 100644 --- a/paddle/gserver/layers/MKLDNNBase.h +++ b/paddle/gserver/layers/MKLDNNBase.h @@ -21,8 +21,8 @@ namespace paddle { typedef enum { MKLDNN_BASE = 1, // basical info of MKLDNN MKLDNN_TESTS = 1, // gtest info of MKLDNN - MKLDNN_SIZES = 2, // size info of MKLDNN - MKLDNN_FMTS = 3, // format info of MKLDNN + MKLDNN_FMTS = 2, // format info of MKLDNN + MKLDNN_SIZES = 3, // size info of MKLDNN MKLDNN_ALL = 4, // show all info of MKLDNN } MKLDNN_LOG_LEVEL; diff --git a/paddle/gserver/layers/MKLDNNLayer.cpp b/paddle/gserver/layers/MKLDNNLayer.cpp index 91f0ff5bd326e848c49c4e57ccb9e02e2bdc0626..f4968c4af3773a2c55f518e056df128b82e31ff6 100644 --- a/paddle/gserver/layers/MKLDNNLayer.cpp +++ b/paddle/gserver/layers/MKLDNNLayer.cpp @@ -105,6 +105,10 @@ void MKLDNNLayer::backward(const UpdateCallback& callback) { // external output grad is not necessary // since output may be mkldnn internal buffer or merge them directly. CHECK(outGrad_) << "internal output grad is necessary"; + if (extOutGrad_) { + CHECK_EQ(extOutGrad_->getData(), output_.grad->getData()) + << "the external buffer should share the same data with output_.grad"; + } if (cvtOutGrad_) { pipelineBwd_.insert(pipelineBwd_.begin(), *cvtOutGrad_); } @@ -293,7 +297,6 @@ void MKLDNNLayer::resetMergeGrad(MKLDNNMatrixPtr& out) { for (auto it = outputMap_.begin(); it != outputMap_.end(); ++it) { MKLDNNMatrixPtr src = std::dynamic_pointer_cast(it->second->grad); - VLOG(MKLDNN_BASE) << getName() << " has output grad " << it->first; CHECK(src) << "should be MKLDNNMatrix"; auto srcDims = src->getDims(); auto dstDims = out->getDims(); @@ -301,6 +304,8 @@ void MKLDNNLayer::resetMergeGrad(MKLDNNMatrixPtr& out) { for (size_t i = 0; i < srcDims.size(); ++i) { CHECK_EQ(srcDims[i], dstDims[i]); } + VLOG(MKLDNN_BASE) << getName() << " has output grad " << it->first + << ", format " << src->getFormat(); srcPDs.push_back(src->getPrimitiveDesc()); srcs.push_back(*src); } diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index faad434526fd76d1ddb65984c43ac529ea071612..656b5ee2d77eb091f44f4db5f30ba808b30fdde5 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -58,13 +58,13 @@ protected: std::vector pipelineFwd_; std::vector pipelineBwd_; - /// value and grad are seperate as internal and external buffers. + /// value and grad are seperated as internal and external buffers. /// each MKLDNNLayer must init or reset internal buffer at least, /// and the external buffer format is always nchw of nc(when h==w==1), /// which is the same format as paddle. - /// When mixed with cpu device, the output_.value and output_.grad - /// always save the external data. - /// When all layers are all mkldnn layers, they could be internal data. + /// The output_.value and output_.grad always save the external data, + /// when mixed with cpu device. + /// When all layers are mkldnn layers, they could save internal data. /// below MKLDNNMatrix buffers are all internal buffers MKLDNNMatrixPtr inVal_; MKLDNNMatrixPtr inGrad_; diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp index 3bf6a9e176cc1235aa5ddefcedd4253e6afc1342..0a19fe23336ea943cb8a572dc40f8c0fbbd7236a 100644 --- a/paddle/gserver/tests/MKLDNNTester.cpp +++ b/paddle/gserver/tests/MKLDNNTester.cpp @@ -97,7 +97,7 @@ void MKLDNNTester::randomWgtDatas() { parameters_[REF][i]->randomize(); dnnValue->copyFrom(*refValue); - VLOG(lvl_) << "Random weight data " << parameters_[DNN][i]->getName(); + VLOG(MKLDNN_TESTS) << "Random weight " << parameters_[DNN][i]->getName(); printVector(dnnValue); } } @@ -109,7 +109,7 @@ void MKLDNNTester::randomBotDatas() { dataLayers_[REF][i]->getOutputValue()->randomizeUniform(); dataLayers_[DNN][i]->getOutputValue()->copyFrom( *(dataLayers_[REF][i]->getOutputValue())); - VLOG(lvl_) << "Input " << i << " data:"; + VLOG(MKLDNN_TESTS) << "Random Foward, InputValue " << i; printMatrix(dataLayers_[REF][i]->getOutputValue()); } } @@ -118,12 +118,12 @@ void MKLDNNTester::randomTopDiffs() { refLayer_->getOutputGrad()->randomizeUniform(); dnnLayer_->getOutput(CPU_DEVICE) .grad->copyFrom(*(refLayer_->getOutputGrad())); - VLOG(lvl_) << "Random Backward Input, TopDiff: "; + VLOG(MKLDNN_TESTS) << "Random Backward, OutputGrad"; printMatrix(refLayer_->getOutputGrad()); } void MKLDNNTester::checkForward() { - VLOG(MKLDNN_ALL) << "Check Forward"; + VLOG(MKLDNN_TESTS) << "Check Forward"; printTopDatas(); double delta = compareMatrix(dnnLayer_->getOutputValue(), refLayer_->getOutputValue()); @@ -131,15 +131,15 @@ void MKLDNNTester::checkForward() { } void MKLDNNTester::checkBackwardData() { - VLOG(MKLDNN_ALL) << "Check Backward Data"; + VLOG(MKLDNN_TESTS) << "Check Backward Data"; // TODO(TJ): uncomment me when batch norm ready // const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm"; for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { const MatrixPtr& dnnDiff = dataLayers_[DNN][i]->getOutputGrad(); const MatrixPtr& refDiff = dataLayers_[REF][i]->getOutputGrad(); - VLOG(lvl_) << "Mkldnn Backward Output BotDiff " << i; + VLOG(MKLDNN_ALL) << "MKLDNN Backward Result: InputGrad " << i; printMatrix(dnnDiff); - VLOG(lvl_) << "Reference Backward Output BotDiff " << i; + VLOG(MKLDNN_ALL) << "Reference Backward Result: InputGrad " << i; printMatrix(refDiff); double delta = compareMatrix(dnnDiff, refDiff); @@ -153,7 +153,7 @@ void MKLDNNTester::checkBackwardData() { } void MKLDNNTester::checkBackwardWgts() { - VLOG(MKLDNN_ALL) << "Check Backward Weight"; + VLOG(MKLDNN_TESTS) << "Check Backward Weight"; CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); vector dnnWgts; // used to temply save mkldnn weights saveWgt(parameters_[DNN], dnnWgts); @@ -165,9 +165,11 @@ void MKLDNNTester::checkBackwardWgts() { for (size_t i = 0; i < parameters_[DNN].size(); ++i) { const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE); - VLOG(lvl_) << "Mkldnn Output weight " << parameters_[DNN][i]->getName(); + VLOG(MKLDNN_ALL) << "MKLDNN Result: weight value" + << parameters_[DNN][i]->getName(); printVector(dnn); - VLOG(lvl_) << "Reference Output weight " << parameters_[REF][i]->getName(); + VLOG(MKLDNN_ALL) << "Reference Result: weight value " + << parameters_[REF][i]->getName(); printVector(ref); double delta = compareVector(dnn, ref); @@ -240,7 +242,8 @@ void MKLDNNTester::printTopDatas() { } for (int n = 0; n < NUM; ++n) { - VLOG(lvl_) << testLayers_[n]->getType() << " forward output TopData: "; + VLOG(MKLDNN_ALL) << testLayers_[n]->getType() + << " Forward Result: OutputValue"; printMatrix(testLayers_[n]->getOutputValue()); } } @@ -252,7 +255,7 @@ void MKLDNNTester::printMatrix(const MatrixPtr& m) { std::ostringstream ostr; m->print(ostr); - VLOG(lvl_) << std::endl << ostr.str(); + VLOG(MKLDNN_ALL) << std::endl << ostr.str(); } void MKLDNNTester::printVector(const VectorPtr& v) { @@ -262,7 +265,7 @@ void MKLDNNTester::printVector(const VectorPtr& v) { std::ostringstream ostr; v->print(ostr, v->getSize()); - VLOG(lvl_) << std::endl << ostr.str(); + VLOG(MKLDNN_ALL) << std::endl << ostr.str(); } double MKLDNNTester::getDelta(const real* d1, @@ -314,7 +317,7 @@ void MKLDNNTester::runOnce() { UpdateCallback updateCallback = [](Parameter* para) { auto& grad = para->getBuf(PARAMETER_GRADIENT); auto& value = para->getBuf(PARAMETER_VALUE); - real lr = 1e-3; + real lr = 1e-2; value->add(*grad, lr); grad->zeroMem(); }; @@ -340,10 +343,9 @@ void MKLDNNTester::run(const TestConfig& dnn, size_t batchSize, size_t inputImgH, size_t inputImgW, + bool printDetails, size_t iter, - float epsilon, - bool log, - int level) { + float epsilon) { CHECK(dnn.layerConfig.type().compare(0, 7, "mkldnn_") == 0 || dnn.layerConfig.active_type().compare(0, 7, "mkldnn_") == 0) << "should be MKLDNN layer or MKLDNN activation"; @@ -359,10 +361,9 @@ void MKLDNNTester::run(const TestConfig& dnn, ih_ = inputImgH; iw_ = inputImgW; + log_ = printDetails; iter_ = iter; eps_ = epsilon; - log_ = log; - lvl_ = level; // Firstly test mkldnn init from PARAM_FORMAT_ORIGINAL weight reset(dnn, ref, batchSize); @@ -531,9 +532,11 @@ void MKLDNNTester::getOutResult(const std::string& configPath, void MKLDNNTester::compareResult(DataOut& ref, DataOut& dnn, float eps) { CHECK_EQ(ref.outValues.size(), dnn.outValues.size()); CHECK_EQ(ref.paraValues.size(), dnn.paraValues.size()); + VLOG(MKLDNN_TESTS) << "compare value size: " << ref.outValues.size(); for (size_t i = 0; i < ref.outValues.size(); i++) { EXPECT_LE(fabs(compareMatrix(ref.outValues[i], dnn.outValues[i])), eps); } + VLOG(MKLDNN_TESTS) << "compare param size: " << ref.outValues.size(); for (size_t i = 0; i < ref.paraValues.size(); i++) { EXPECT_LE(fabs(compareVector(ref.paraValues[i], dnn.paraValues[i])), eps); } @@ -544,9 +547,10 @@ void MKLDNNTester::runBranchesTest(const std::string& configPath, float eps) { DataIn in; initArgument(in, configPath, iter); - DataOut outCpu, outDnn; + VLOG(MKLDNN_TESTS) << "runing cpu network"; getOutResult(configPath, in, outCpu, false, iter); + VLOG(MKLDNN_TESTS) << "runing mkldnn network"; getOutResult(configPath, in, outDnn, true, iter); compareResult(outCpu, outDnn, eps); diff --git a/paddle/gserver/tests/MKLDNNTester.h b/paddle/gserver/tests/MKLDNNTester.h index 51abfcb67e2ec35fe1b0179e742a7d18f08f8a2c..c385d1c72717d120211f167b5c5eb9a557da3714 100644 --- a/paddle/gserver/tests/MKLDNNTester.h +++ b/paddle/gserver/tests/MKLDNNTester.h @@ -58,8 +58,6 @@ protected: size_t iter_; /// whether to print out the details bool log_; - /// vlog level to print the matrix details datas - int lvl_; /// epsilon float eps_; /// input image size, default 1 @@ -70,7 +68,6 @@ public: iter_ = iter; eps_ = epsilon; log_ = false; - lvl_ = MKLDNN_ALL; } ~MKLDNNTester() {} @@ -81,10 +78,9 @@ public: size_t batchSize, size_t inputImgH = 1, size_t inputImgW = 1, + bool printDetails = false, size_t iter = 3, - float epsilon = 1e-4, - bool log = false, - int level = MKLDNN_ALL); + float epsilon = 1e-4); static void runBranchesTest(const std::string& configPath, size_t iter = 3, float eps = 1e-4); diff --git a/paddle/trainer/tests/sample_trainer_config_branch_net.conf b/paddle/trainer/tests/sample_trainer_config_branch_net.conf index c2594bc13c250a877a7b8a77e11405671c4d8907..a073708a184d6392a4eead69272e684013f1c751 100644 --- a/paddle/trainer/tests/sample_trainer_config_branch_net.conf +++ b/paddle/trainer/tests/sample_trainer_config_branch_net.conf @@ -17,7 +17,7 @@ from paddle.trainer_config_helpers import * ################################### Data Configuration ################################### TrainData(ProtoData(files = "trainer/tests/mnist.list")) ################################### Algorithm Configuration ################################### -settings(batch_size = 256, +settings(batch_size = 128, learning_method = MomentumOptimizer(momentum=0.5, sparse=False)) ################################### Network Configuration ################################### data = data_layer(name ="input", size=784) @@ -44,10 +44,11 @@ a2 = img_conv_layer(input=tmp, shared_biases=True, act=ReluActivation()) -tmp = concat_layer(input=[a1, a2]) +tmp = addto_layer(input=[a1, a2], + act=ReluActivation(), + bias_attr=False) tmp = img_pool_layer(input=tmp, - num_channels=64, pool_size=3, stride=2, padding=1, @@ -55,35 +56,34 @@ tmp = img_pool_layer(input=tmp, b1 = img_conv_layer(input=tmp, filter_size=3, - num_filters=64, + num_filters=32, padding=1, shared_biases=True, act=ReluActivation()) b1 = img_pool_layer(input=b1, pool_size=3, - stride=1, - padding=1, + stride=2, + padding=0, pool_type=MaxPooling()) b2 = img_conv_layer(input=tmp, - filter_size=5, + filter_size=3, num_filters=64, - padding=2, + padding=1, shared_biases=True, act=ReluActivation()) b2 = img_pool_layer(input=b2, pool_size=5, - stride=1, - padding=2, + stride=2, + padding=1, pool_type=MaxPooling()) -tmp = addto_layer(input=[b1, b2], - act=ReluActivation(), - bias_attr=False) +tmp = concat_layer(input=[b1, b2]) tmp = img_pool_layer(input=tmp, + num_channels=96, pool_size=3, stride=2, padding=1, diff --git a/paddle/trainer/tests/sample_trainer_config_simple_net.conf b/paddle/trainer/tests/sample_trainer_config_simple_net.conf index 77f78161535c49da4ef7fc1563cff58c021aecef..2ba71884d0953dc721808732fde12e695c6a757d 100644 --- a/paddle/trainer/tests/sample_trainer_config_simple_net.conf +++ b/paddle/trainer/tests/sample_trainer_config_simple_net.conf @@ -17,7 +17,7 @@ from paddle.trainer_config_helpers import * ################################### Data Configuration ################################### TrainData(ProtoData(files = "trainer/tests/mnist.list")) ################################### Algorithm Configuration ################################### -settings(batch_size = 1000, +settings(batch_size = 128, learning_method = MomentumOptimizer(momentum=0.5, sparse=False)) ################################### Network Configuration ################################### data = data_layer(name ="input", size=784)