diff --git a/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp b/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f577616230be65e9581cf8f3ed5f63a77c7c3e21 --- /dev/null +++ b/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp @@ -0,0 +1,318 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MKLDNNBatchNormLayer.h" + +using namespace mkldnn; // NOLINT +typedef memory::format format; + +namespace paddle { + +REGISTER_LAYER(mkldnn_batch_norm, MKLDNNBatchNormLayer); + +const real MKLDNNBatchNormLayer::EPS = 1E-5; + +bool MKLDNNBatchNormLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + if (!MKLDNNLayer::init(layerMap, parameterMap)) { + return false; + } + + // first one is input layer + // the other two are created in config_parser.py saving moving mean and var + CHECK_EQ(inputLayers_.size(), 3U); + CHECK_EQ(inputLayers_.size(), parameters_.size()); + CHECK_EQ(inputLayers_.size(), size_t(config_.inputs_size())); + + const ImageConfig& conf = config_.inputs(0).image_conf(); + ic_ = conf.channels(); + ih_ = inputLayers_[0]->getOutput().getFrameHeight(); + iw_ = inputLayers_[0]->getOutput().getFrameWidth(); + if (iw_ == 0 && ih_ == 0) { + iw_ = conf.img_size(); + ih_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); + } + oc_ = ic_; + oh_ = ih_; + ow_ = iw_; + if (config_.has_use_global_stats()) { + useGlobalStats_ = config_.use_global_stats(); + } + movingAvgFraction_ = config_.moving_average_fraction(); + VLOG(MKLDNN_BASE) << "--- " << (useGlobalStats_ ? "use" : "do not use") + << " --- global stats"; + VLOG(MKLDNN_BASE) << "Moving average fraction: " << movingAvgFraction_; + + initWeight(); + movingMean_.reset(new Weight(oc_, 1, parameters_[1], 0)); + movingVar_.reset(new Weight(oc_, 1, parameters_[2], 0)); + return true; +} + +void MKLDNNBatchNormLayer::initWeight() { + weight_.reset(new Weight(1, oc_, parameters_[0])); + if (biasParameter_.get() != NULL) { + biases_ = std::unique_ptr(new Weight(1, oc_, biasParameter_)); + } + CHECK_EQ(weight_ != nullptr, biases_ != nullptr) + << "only support have both weight and bias, or neither"; + if (weight_ && weight_->getW()) { + CHECK(biases_ && biases_->getW()); + valueScaleShift_ = Matrix::create(2, oc_, false, false); + valueScaleShift_->zeroMem(); + VectorPtr scale(new CpuVector(oc_, valueScaleShift_->getMemoryHandle(), 0)); + VectorPtr shift( + new CpuVector(oc_, valueScaleShift_->getMemoryHandle(), oc_)); + const VectorPtr& wgt = parameters_[0]->getBuf(PARAMETER_VALUE); + const VectorPtr& bias = biasParameter_->getBuf(PARAMETER_VALUE); + scale->copyFrom(*wgt); + shift->copyFrom(*bias); + wgt->setData(valueScaleShift_->getData()); + bias->setData(valueScaleShift_->getData() + oc_); + } + if (weight_ && weight_->getWGrad()) { + CHECK(biases_ && biases_->getWGrad()); + gradScaleShift_ = Matrix::create(2, oc_, false, false); + gradScaleShift_->zeroMem(); + const VectorPtr& wgt = parameters_[0]->getBuf(PARAMETER_GRADIENT); + const VectorPtr& bias = biasParameter_->getBuf(PARAMETER_GRADIENT); + wgt->setData(gradScaleShift_->getData()); + bias->setData(gradScaleShift_->getData() + oc_); + } +} + +void MKLDNNBatchNormLayer::convertWeightsFromPaddle() { + if (hasInitedWgt_) { + return; + } + // prepare mean and var if necessary + if (useGlobalStats_) { + CHECK(mean_); + CHECK(var_); + mean_->copyFrom(*(movingMean_->getW())); + var_->copyFrom(*(movingVar_->getW())); + } + hasInitedWgt_ = true; +} + +void MKLDNNBatchNormLayer::calMovingMeanAndVar() { + // calculating and saving moving mean and variance + CHECK_EQ(useGlobalStats_, false); + movingMean_->getW()->add( + *mean_, movingAvgFraction_, 1.0 - movingAvgFraction_); + // here var is v^2 + movingVar_->getW()->add(*var_, movingAvgFraction_, 1.0 - movingAvgFraction_); +} + +void MKLDNNBatchNormLayer::reshape( + int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + reshapeInput(bs, ih, iw); + oh = ih; + ow = ow; + // ic_ and oc can not be changed + CHECK_EQ(inputElemenCnt_ / bs / ih / iw, (size_t)ic) + << "Input channel can not be changed"; + reshapeOutput(oh, ow); + resizeOutput(bs, oc * oh * ow); + printSizeInfo(); +} + +void MKLDNNBatchNormLayer::resetFwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + // In training phase, it will always calculate mean and var, + // so useGlobalStats must be false. + // In scoring phase, it depends on useGlobalStats choice. + if (passType_ != PASS_TEST && useGlobalStats_ == true) { + LOG(WARNING) << "use_global_stats is invalid setting in training phase"; + useGlobalStats_ = false; + } + + resetFwdBuffers(in, wgt, out); + + resetFwdPD(fwdPD_, in, wgt, out); + + resetFwdPipeline(pipeline, fwdPD_, in, wgt, out); +} + +void MKLDNNBatchNormLayer::resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + std::shared_ptr pd; + + resetBwdBuffers(in, wgt, out); + + resetBwdPD(pd, in, wgt, out); + + resetBwdPipeline(pipeline, pd, in, wgt, out); +} + +void MKLDNNBatchNormLayer::forward(PassType passType) { + MKLDNNLayer::forward(passType); + + // calculate and save moving mean and variance + if (passType_ != PASS_TEST) { + calMovingMeanAndVar(); + } +} + +void MKLDNNBatchNormLayer::updateWeights(const UpdateCallback& callback) { + weight_->getParameterPtr()->incUpdate(callback); + if (biases_ && biases_->getWGrad()) { + biases_->getParameterPtr()->incUpdate(callback); + } +} + +void MKLDNNBatchNormLayer::resetFwdBuffers(MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& out) { + resetInValue(in); + + memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; + CHECK(in); + auto outPD = + MKLDNNMatrix::createPrimitiveDesc(outDims, in->getFormat(), engine_); + resetOutValue(out, outPD); + + if (valueScaleShift_) { + auto pd = MKLDNNMatrix::createPrimitiveDesc({2, oc_}, format::nc, engine_); + resetWithMatrix(wgt, valueScaleShift_, pd); + } + if (passType_ != PASS_TEST || useGlobalStats_) { + auto pd = MKLDNNMatrix::createPrimitiveDesc({oc_}, format::x, engine_); + mean_ = MKLDNNMatrix::create(pd); + var_ = MKLDNNMatrix::create(pd); + } +} + +void MKLDNNBatchNormLayer::resetFwdPD( + std::shared_ptr& pd, + MKLDNNMatrixPtr in, + MKLDNNMatrixPtr wgt, + MKLDNNMatrixPtr out) { + flags_ = 0u; + prop_kind pk = passType_ == PASS_TEST ? prop_kind::forward_scoring + : prop_kind::forward_training; + if (useGlobalStats_) { + flags_ = (flags_ | batch_normalization_flag::use_global_stats); + } + if (wgt) { + flags_ = (flags_ | batch_normalization_flag::use_scale_shift); + } + auto fwdDesc = bn_fwd::desc(pk, in->getMemoryDesc(), EPS, flags_); + pd.reset(new bn_fwd::primitive_desc(fwdDesc, engine_)); + // TODO(TJ): use check macro + CHECK(out); + CHECK(out->getPrimitiveDesc() == pd->dst_primitive_desc()); + if (wgt) { + CHECK(wgt->getPrimitiveDesc() == pd->weights_primitive_desc()); + } + if (passType_ != PASS_TEST || useGlobalStats_) { + CHECK(mean_); + CHECK(mean_->getPrimitiveDesc() == pd->mean_primitive_desc()); + CHECK(var_); + CHECK(var_->getPrimitiveDesc() == pd->variance_primitive_desc()); + } +} + +void MKLDNNBatchNormLayer::resetFwdPipeline( + std::vector& pipeline, + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& out) { + if (passType_ == PASS_TEST) { + if (useGlobalStats_) { + fwd_.reset(wgt != nullptr ? new bn_fwd(*pd, + *in, + (const primitive::at)(*mean_), + (const primitive::at)(*var_), + *wgt, + *out) + : new bn_fwd(*pd, + *in, + (const primitive::at)(*mean_), + (const primitive::at)(*var_), + *out)); + } else { + fwd_.reset(wgt != nullptr ? new bn_fwd(*pd, *in, *wgt, *out) + : new bn_fwd(*pd, *in, *out)); + } + } else { + CHECK_EQ(useGlobalStats_, false) + << "useGlobalStats should be false in training"; + fwd_.reset(wgt != nullptr ? new bn_fwd(*pd, *in, *wgt, *out, *mean_, *var_) + : new bn_fwd(*pd, *in, *out, *mean_, *var_)); + } + pipeline.push_back(*fwd_); +} + +void MKLDNNBatchNormLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& out) { + CHECK(inVal_ && outVal_); + resetOutGrad(out, outVal_->getPrimitiveDesc()); + resetInGrad(in, inVal_->getPrimitiveDesc()); + if (gradScaleShift_) { + CHECK(wgtVal_); + resetWithMatrix(wgt, gradScaleShift_, wgtVal_->getPrimitiveDesc()); + } +} + +void MKLDNNBatchNormLayer::resetBwdPD( + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& out) { + pd = nullptr; + if (in == nullptr) { + return; + } + CHECK(out); + CHECK(out->getPrimitiveDesc() == in->getPrimitiveDesc()); + auto md = in->getMemoryDesc(); + auto bwdDesc = bn_bwd::desc(prop_kind::backward, md, md, EPS, flags_); + pd.reset(new bn_bwd::primitive_desc(bwdDesc, engine_, *fwdPD_)); + // TODO(TJ): use check macro + CHECK(wgt); + CHECK(wgt->getPrimitiveDesc() == pd->diff_weights_primitive_desc()); + CHECK(pd->weights_primitive_desc() == fwdPD_->weights_primitive_desc()); + CHECK(mean_); + CHECK(mean_->getPrimitiveDesc() == pd->mean_primitive_desc()); + CHECK(var_); + CHECK(var_->getPrimitiveDesc() == pd->variance_primitive_desc()); +} + +void MKLDNNBatchNormLayer::resetBwdPipeline( + std::vector& pipeline, + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& out) { + if (pd == nullptr) { + return; + } + CHECK(inVal_); + bwdData_.reset( + wgt && wgtVal_ + ? new bn_bwd(*pd, *inVal_, *mean_, *var_, *out, *wgtVal_, *in, *wgt) + : new bn_bwd(*pd, *inVal_, *mean_, *var_, *out, *in)); + pipeline.push_back(*bwdData_); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNBatchNormLayer.h b/paddle/gserver/layers/MKLDNNBatchNormLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..456c0424ecb8dde17f98a900c5d77268cc672e34 --- /dev/null +++ b/paddle/gserver/layers/MKLDNNBatchNormLayer.h @@ -0,0 +1,138 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "MKLDNNLayer.h" +#include "mkldnn.hpp" + +namespace paddle { +typedef mkldnn::batch_normalization_forward bn_fwd; +typedef mkldnn::batch_normalization_backward bn_bwd; + +/** + * @brief A subclass of MKLDNNLayer BatchNorm layer. + * + * The config file api is mkldnn_batch_norm + */ +class MKLDNNBatchNormLayer : public MKLDNNLayer { +protected: + // save forward primitive_desc, which can be used backward + std::shared_ptr fwdPD_; + + // Epsilon value used in the batch normalization formula. + static const real EPS; + // weight and bias in paddle + std::unique_ptr weight_; + std::unique_ptr biases_; + // mkldnn use a large buffer store both scale and shift + // which are weight and bias in paddle corresponding. + MatrixPtr valueScaleShift_; + MatrixPtr gradScaleShift_; + // Moving average of mean. + std::unique_ptr movingMean_; + // Moving average of variance. + std::unique_ptr movingVar_; + + // if useGlobalStats_ is true, will use the loaded mean and variance. + // otherwise, calculate mean and variance in every mini-batch. + bool useGlobalStats_; + // used in MKLDNN primitive desc + unsigned flags_; + // use to compute moving mean and variance. + real movingAvgFraction_; + // whether the weight has been init + bool hasInitedWgt_; + + // local mean and variance + // when useGlobalStats_ they are loaded from moving mean and variance + // when do not useGlobalStats_ they are calculated from this mini-batch + MKLDNNMatrixPtr mean_; + MKLDNNMatrixPtr var_; + +public: + explicit MKLDNNBatchNormLayer(const LayerConfig& config) + : MKLDNNLayer(config), useGlobalStats_(true), hasInitedWgt_(false) {} + + ~MKLDNNBatchNormLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + + void reshape( + int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + + void resetFwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) override; + + void resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) override; + + void updateWeights(const UpdateCallback& callback) override; + + void convertWeightsFromPaddle() override; + +protected: + void initWeight(); + /** + * cal moving mean and variance. + * moving = moving * AvgFraction + local * (1 - AvgFraction) + */ + void calMovingMeanAndVar(); + /** + * Forward functions: reset buffers(input, weight, output), + * reset primitive descriptor, + * reset pipeline. + */ + void resetFwdBuffers(MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& out); + void resetFwdPD(std::shared_ptr& pd, + MKLDNNMatrixPtr in, + MKLDNNMatrixPtr wgt, + MKLDNNMatrixPtr out); + void resetFwdPipeline(std::vector& pipeline, + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& out); + + /** + * Backward functions: reset buffers(input, weight, output), + * reset primitive descriptor, + * reset pipeline. + */ + void resetBwdBuffers(MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& out); + void resetBwdPD(std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& out); + void resetBwdPipeline(std::vector& pipeline, + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& out); +}; + +} // namespace paddle diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp index 0a19fe23336ea943cb8a572dc40f8c0fbbd7236a..73b7e8857f35d194e71b2b5b341f89b77fd1f8b0 100644 --- a/paddle/gserver/tests/MKLDNNTester.cpp +++ b/paddle/gserver/tests/MKLDNNTester.cpp @@ -91,10 +91,16 @@ void MKLDNNTester::setInputImgSize() { // init randome parameters of ref, and copy to mkldnn void MKLDNNTester::randomWgtDatas() { EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size()); + const bool isBN = refLayer_->getType() == "batch_norm"; for (size_t i = 0; i < parameters_[REF].size(); ++i) { const VectorPtr& dnnValue = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); const VectorPtr& refValue = parameters_[REF][i]->getBuf(PARAMETER_VALUE); parameters_[REF][i]->randomize(); + if (isBN && i == 2) { + // this param is moving average in batch norm, which must larger than 0 + real offset = fabs(refValue->getMin()) + 1.0; + refValue->add(offset); + } dnnValue->copyFrom(*refValue); VLOG(MKLDNN_TESTS) << "Random weight " << parameters_[DNN][i]->getName(); @@ -132,8 +138,7 @@ void MKLDNNTester::checkForward() { void MKLDNNTester::checkBackwardData() { VLOG(MKLDNN_TESTS) << "Check Backward Data"; - // TODO(TJ): uncomment me when batch norm ready - // const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm"; + const bool isBN = refLayer_->getType() == "batch_norm"; for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { const MatrixPtr& dnnDiff = dataLayers_[DNN][i]->getOutputGrad(); const MatrixPtr& refDiff = dataLayers_[REF][i]->getOutputGrad(); @@ -144,11 +149,11 @@ void MKLDNNTester::checkBackwardData() { double delta = compareMatrix(dnnDiff, refDiff); EXPECT_LE(fabs(delta), eps_); - // TODO(TJ): uncomment me when batch norm ready - // if (isBN) { - // // the other two inputs in batch norm are for moving mean and var - // break; - // } + if (isBN) { + // the other two inputs in batch norm are for moving mean and var + // do not have grad to compare + break; + } } } @@ -308,10 +313,14 @@ double MKLDNNTester::compareVector(const VectorPtr& v1, const VectorPtr& v2) { void MKLDNNTester::runOnce() { // test forward randomBotDatas(); - dnnLayer_->forward(PASS_TRAIN); - refLayer_->forward(PASS_TRAIN); + dnnLayer_->forward(passType_); + refLayer_->forward(passType_); checkForward(); + if (passType_ == PASS_TEST) { + return; + } + // test backward // simple updater UpdateCallback updateCallback = [](Parameter* para) { @@ -343,6 +352,7 @@ void MKLDNNTester::run(const TestConfig& dnn, size_t batchSize, size_t inputImgH, size_t inputImgW, + PassType passType, bool printDetails, size_t iter, float epsilon) { @@ -361,6 +371,7 @@ void MKLDNNTester::run(const TestConfig& dnn, ih_ = inputImgH; iw_ = inputImgW; + passType_ = passType; log_ = printDetails; iter_ = iter; eps_ = epsilon; diff --git a/paddle/gserver/tests/MKLDNNTester.h b/paddle/gserver/tests/MKLDNNTester.h index c385d1c72717d120211f167b5c5eb9a557da3714..19d8848f74f2ee4a809e42164a0eb180abd2a4e1 100644 --- a/paddle/gserver/tests/MKLDNNTester.h +++ b/paddle/gserver/tests/MKLDNNTester.h @@ -62,12 +62,15 @@ protected: float eps_; /// input image size, default 1 size_t ih_, iw_; + /// passType, PASS_TRAIN, PASS_TEST or PASS_GC (Gradient Check pass) + PassType passType_; public: explicit MKLDNNTester(size_t iter = 3, float epsilon = 1e-4) { iter_ = iter; eps_ = epsilon; log_ = false; + passType_ = PASS_TRAIN; } ~MKLDNNTester() {} @@ -78,6 +81,7 @@ public: size_t batchSize, size_t inputImgH = 1, size_t inputImgW = 1, + PassType passType = PASS_TRAIN, bool printDetails = false, size_t iter = 3, float epsilon = 1e-4); diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp index 6cb4ca5e08eab5b979e404c9e09dcfec11086c22..85d4f437c2664135a7975c6ed3270d8f1ddbeaf4 100644 --- a/paddle/gserver/tests/test_MKLDNN.cpp +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -212,6 +212,66 @@ TEST(MKLDNNLayer, PoolLayer) { testPoolLayer({2, 8, 56, 56, 29, 29, 3, 3, 1, 1, 2, 2}); } +struct testBatchNormDesc { + int bs; + int ic; + int ih, iw; +}; + +static void getMKLDNNBatchNormConfig(TestConfig& cfg, + const testBatchNormDesc& pm) { + cfg.layerConfig.set_size(pm.ic * pm.ih * pm.iw); + cfg.layerConfig.set_type("mkldnn_batch_norm"); + cfg.biasSize = pm.ic; + cfg.inputDefs.push_back( + {INPUT_DATA, + "layer_0", + /* size of input layer= */ size_t(pm.ic * pm.ih * pm.iw), + /* size of weight= */ size_t(pm.ic)}); + cfg.inputDefs.push_back( + {INPUT_DATA, "layer_1_moving_mean", 1, size_t(pm.ic)}); + cfg.inputDefs.back().isStatic = true; + cfg.inputDefs.push_back({INPUT_DATA, "layer_2_moving_var", 1, size_t(pm.ic)}); + cfg.inputDefs.back().isStatic = true; + LayerInputConfig* input = cfg.layerConfig.add_inputs(); + // TODO(TJ): uncomment me when refine and support comparing all zeroes vector + // cfg.layerConfig.set_active_type("relu"); + cfg.layerConfig.add_inputs(); + cfg.layerConfig.add_inputs(); + ImageConfig* img_conf = input->mutable_image_conf(); + img_conf->set_channels(pm.ic); + img_conf->set_img_size_y(pm.ih); + img_conf->set_img_size(pm.iw); +} + +void testBatchNormLayer(const testBatchNormDesc& pm) { + TestConfig dnnConfig; + getMKLDNNBatchNormConfig(dnnConfig, pm); + TestConfig refConfig = dnnConfig; + refConfig.layerConfig.set_type("batch_norm"); + // for PASS_TRAIN, use_global_stats always should be false, and batchsize != 1 + VLOG(MKLDNN_TESTS) << "check train phase"; + dnnConfig.layerConfig.set_use_global_stats(false); + refConfig.layerConfig.set_use_global_stats(false); + MKLDNNTester tester; + tester.run(dnnConfig, refConfig, pm.bs, pm.ih, pm.iw, PASS_TRAIN); + // for PASS_TEST, check use_global_stats true and false, and batchsize 1 + VLOG(MKLDNN_TESTS) << "check test phase"; + for (auto useGS : {false, true}) { + dnnConfig.layerConfig.set_use_global_stats(useGS); + refConfig.layerConfig.set_use_global_stats(useGS); + MKLDNNTester tester; + for (auto bs : {pm.bs, 1}) { + tester.run(dnnConfig, refConfig, bs, pm.ih, pm.iw, PASS_TEST); + } + } +} + +TEST(MKLDNNLayer, BatchNormLayer) { + testBatchNormLayer({4, 10, 6, 6}); + testBatchNormLayer({16, 32, 16, 16}); +} + struct testActDesc { int bs, ic, ih, iw; }; diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index fe755d096da9713e39581a909e5d21aa93d69f0f..2b62d4e11ac7276924947ab47360ffca84240aea 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -91,6 +91,11 @@ public: const MKLDNNMatrixPtr& dst, bool checkData = true); + void copyFrom(const Matrix& src) { + // TODO(TJ): reorder data if this format is not nchw or x + m_->copyFrom(src); + } + public: /** * Reorder this MKLDNNMatrix from other format. diff --git a/paddle/trainer/tests/sample_trainer_config_branch_net.conf b/paddle/trainer/tests/sample_trainer_config_branch_net.conf index a073708a184d6392a4eead69272e684013f1c751..3d8fb77a11958218091d2ee72e1d5a40ad1d9f5b 100644 --- a/paddle/trainer/tests/sample_trainer_config_branch_net.conf +++ b/paddle/trainer/tests/sample_trainer_config_branch_net.conf @@ -89,6 +89,36 @@ tmp = img_pool_layer(input=tmp, padding=1, pool_type=MaxPooling()) +tmp = img_conv_layer(input=tmp, + filter_size=3, + num_filters=32, + padding=1, + shared_biases=True, + act=LinearActivation(), + bias_attr=False) + +tmp = batch_norm_layer(input=tmp, + use_global_stats=False, + act=ReluActivation()) + +c1 = img_conv_layer(input=tmp, + filter_size=1, + num_filters=32, + padding=0, + shared_biases=True, + act=ReluActivation()) + +c2 = img_conv_layer(input=tmp, + filter_size=3, + num_filters=32, + padding=1, + shared_biases=True, + act=ReluActivation()) + +tmp = addto_layer(input=[c1, c2], + act=ReluActivation(), + bias_attr=False) + tmp = fc_layer(input=tmp, size=64, bias_attr=False, act=TanhActivation()) diff --git a/paddle/trainer/tests/sample_trainer_config_simple_net.conf b/paddle/trainer/tests/sample_trainer_config_simple_net.conf index 2ba71884d0953dc721808732fde12e695c6a757d..c615b5622b7e50b7aa99a9fcf9f63d7b4351417c 100644 --- a/paddle/trainer/tests/sample_trainer_config_simple_net.conf +++ b/paddle/trainer/tests/sample_trainer_config_simple_net.conf @@ -38,9 +38,14 @@ tmp = img_pool_layer(input=tmp, tmp = img_conv_layer(input=tmp, filter_size=3, - num_filters=64, + num_filters=32, padding=1, shared_biases=True, + act=LinearActivation(), + bias_attr=False) + +tmp = batch_norm_layer(input=tmp, + use_global_stats=False, act=ReluActivation()) tmp = img_pool_layer(input=tmp, diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 09c92d3513e86a7657880c01736f5f41f53cfcf6..e88e962cff5bbfcb8be1014dbaab85568d2625ff 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2420,6 +2420,7 @@ class BatchNormLayer(LayerBase): # If not use is_static, even set learning_rate = 0, decay_rate = 0, # these paras will change if set average_window in configure. use_gpu = bool(int(g_command_config_args.get("use_gpu", 0))) + use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) is_shared = True if not use_gpu else False for i in xrange(2): inputs.append( @@ -2433,11 +2434,17 @@ class BatchNormLayer(LayerBase): parallel_nn = bool(int(g_command_config_args.get("parallel_nn", 0))) cudnn_version = int(g_command_config_args.get("cudnn_version", 0)) - # Automatically select cudnn_batch_norm for GPU and batch_norm for CPU. - # Also based on cudnn version. + # Automatically select cudnn_batch_norm for GPU, batch_norm for CPU + # and mkldnn_batch_norm for MKLDNN. Also based on cudnn version. + if batch_norm_type == "mkldnn_batch_norm": + config_assert(use_mkldnn, "mkldnn_batch_norm only support MKLDNN") use_cudnn = use_gpu and batch_norm_type != "batch_norm" and \ + not use_mkldnn and batch_norm_type != "mkldnn_batch_norm" and \ ((not parallel_nn) or self.config.device > -1) - self.layer_type = "cudnn_batch_norm" if use_cudnn else "batch_norm" + if use_cudnn: + self.layer_type = "cudnn_batch_norm" + else: + self.layer_type = "mkldnn_batch_norm" if use_mkldnn else "batch_norm" super(BatchNormLayer, self).__init__( name, self.layer_type, 0, inputs=inputs, **xargs) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 09315b9d9224076d91c16a6c0b949d4ab289bf70..cc1b34df9e7cf8d17bafeb57624548de017066e9 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -3014,16 +3014,19 @@ def batch_norm_layer(input, :param input: batch normalization input. Better be linear activation. Because there is an activation inside batch_normalization. :type input: LayerOutput - :param batch_norm_type: We have batch_norm and cudnn_batch_norm. batch_norm - supports both CPU and GPU. cudnn_batch_norm requires - cuDNN version greater or equal to v4 (>=v4). But - cudnn_batch_norm is faster and needs less memory - than batch_norm. By default (None), we will - automaticly select cudnn_batch_norm for GPU and - batch_norm for CPU. Otherwise, select batch norm - type based on the specified type. If you use cudnn_batch_norm, + :param batch_norm_type: We have batch_norm, mkldnn_batch_norm and cudnn_batch_norm. + batch_norm supports CPU, MKLDNN and GPU. cudnn_batch_norm + requires cuDNN version greater or equal to v4 (>=v4). + But cudnn_batch_norm is faster and needs less + memory than batch_norm. mkldnn_batch_norm requires + enable use_mkldnn. By default (None), we will + automaticly select cudnn_batch_norm for GPU, + mkldnn_batch_norm for MKLDNN and batch_norm for CPU. + Otherwise, select batch norm type based on the + specified type. If you use cudnn_batch_norm, we suggested you use latest version, such as v5.1. :type batch_norm_type: None | string, None or "batch_norm" or "cudnn_batch_norm" + or "mkldnn_batch_norm" :param act: Activation Type. Better be relu. Because batch normalization will normalize input near zero. :type act: BaseActivation @@ -3063,6 +3066,7 @@ def batch_norm_layer(input, else: num_channels = input.size assert (batch_norm_type is None) or (batch_norm_type == "batch_norm") or \ + (batch_norm_type == "mkldnn_batch_norm") or \ (batch_norm_type == "cudnn_batch_norm") l = Layer( name=name,