diff --git a/CMakeLists.txt b/CMakeLists.txt index 811b004162dca0cdf593793e5f1c4139d0cbf4f3..c75b83e50cf9cef8290c37f88b38cdc3d77df39c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -144,7 +144,7 @@ if(WITH_GPU) endif(WITH_GPU) if(WITH_MKLDNN) - list(APPEND EXTERNAL_LIBS ${MKLDNN_LIBRARY} ${MKLDNN_IOMP_LIB}) + list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB} ${MKLDNN_IOMP_LIB}) endif() if(USE_NNPACK) diff --git a/paddle/gserver/CMakeLists.txt b/paddle/gserver/CMakeLists.txt index 0012636b8f618a1b45cfc801c04781e67694956f..62cff9361ccba3ae3b9359ddb932f5b26146eb97 100644 --- a/paddle/gserver/CMakeLists.txt +++ b/paddle/gserver/CMakeLists.txt @@ -23,6 +23,17 @@ endmacro() filter_test(GSERVER_HEADER) filter_test(GSERVER_SOURCES) + +if(NOT WITH_MKLDNN) + file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h") + file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp") + list(REMOVE_ITEM GSERVER_HEADER ${DNN_HEADER}) + list(REMOVE_ITEM GSERVER_SOURCES ${DNN_SOURCES}) + message(STATUS "Skip compiling with MKLDNNLayers and MKLDNNActivations") +else() + message(STATUS "Compile with MKLDNNLayers and MKLDNNActivations") +endif() + if(NOT WITH_GPU) list(REMOVE_ITEM GSERVER_HEADER layers/CudnnConvBaseLayer.h diff --git a/paddle/gserver/layers/MKLDNNBase.h b/paddle/gserver/layers/MKLDNNBase.h new file mode 100644 index 0000000000000000000000000000000000000000..4c0234e7b3a91053596c32cea581fa5d1e26b9d5 --- /dev/null +++ b/paddle/gserver/layers/MKLDNNBase.h @@ -0,0 +1,97 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "mkldnn.hpp" + +namespace paddle { + +typedef enum { + MKLDNN_BASE = 1, // basical info of MKLDNN + MKLDNN_TESTS = 1, // gtest info of MKLDNN + MKLDNN_SIZES = 2, // size info of MKLDNN + MKLDNN_FMTS = 3, // format info of MKLDNN + MKLDNN_ALL = 4, // show all info of MKLDNN +} MKLDNN_LOG_LEVEL; + +/** + * @brief MKLDNN CPU engine. + * + */ +class CPUEngine { +public: + static CPUEngine& Instance() { + // Thread-safe in C++11. + static CPUEngine myInstance; + return myInstance; + } + + // Disallow copy or move + CPUEngine(const CPUEngine&) = delete; // Copy constructor + CPUEngine(CPUEngine&&) = delete; // Move constructor + CPUEngine& operator=(const CPUEngine&) = delete; // Copy assignment + CPUEngine& operator=(CPUEngine&&) = delete; // Move assignment + + mkldnn::engine& getEngine() { return cpuEngine_; } + +protected: + CPUEngine() : cpuEngine_(mkldnn::engine::cpu, 0) {} + // CPUEngine() : cpuEngine_(mkldnn::engine::cpu_lazy, 0) {} + ~CPUEngine() {} + +private: + mkldnn::engine cpuEngine_; +}; + +/** + * @brief MKLDNN Stream. + * + */ +class MKLDNNStream { +public: + MKLDNNStream() : ready_(false) { resetState(); } + + virtual ~MKLDNNStream() {} + + /** + * @brief Submit stream + * @param prims The primitives vector + * @param block Waiting for the stream to complete + */ + void submit(std::vector& prims, bool block = true) { + resetState(); + stream_->submit(prims).wait(block); + ready_ = false; + } + + /** + * @brief Reset the mkldnn stream + */ + void resetState() { + if (ready_) { + return; + } + // TODO(TJ): change me when mkldnn have method to reset this state + // stream_.reset(new mkldnn::stream(mkldnn::stream::kind::lazy)); + stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager)); + ready_ = true; + } + +private: + bool ready_; + std::shared_ptr stream_; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..30f567eaf8248a8fba1b461a2bdbf2aab13f9e08 --- /dev/null +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -0,0 +1,282 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MKLDNNFcLayer.h" +#include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" + +using namespace mkldnn; // NOLINT +typedef memory::format format; +typedef inner_product_forward fc_fwd; +typedef inner_product_backward_weights fc_bwdWgt; +typedef inner_product_backward_data fc_bwdData; + +namespace paddle { + +REGISTER_LAYER(mkldnn_fc, MKLDNNFcLayer); + +bool MKLDNNFcLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + if (!MKLDNNLayer::init(layerMap, parameterMap)) { + return false; + } + + CHECK_EQ(inputLayers_.size(), 1) << "Only support one input layer yet"; + CHECK_EQ(inputLayers_.size(), parameters_.size()); + CHECK(!parameters_[0]->isSparse()) << "Do not support sparse yet"; + + // output size, cat not be changed + oc_ = getSize(); + oh_ = 1; + ow_ = 1; + + // input size can not change in FC + iLayerSize_ = inputLayers_[0]->getSize(); + CHECK_EQ(parameters_[0]->getSize(), iLayerSize_ * oc_); + + // create weight + weight_ = + std::unique_ptr(new Weight(oc_, iLayerSize_, parameters_[0], 0)); + + // create biases + if (biasParameter_.get() != NULL) { + biases_ = std::unique_ptr(new Weight(1, oc_, biasParameter_)); + } + return true; +} + +void MKLDNNFcLayer::convertWeightsFromPaddle() { + if (FLAGS_use_mkldnn_wgt) { + return; + } + + if (hasInitedWgt_) { + return; + } + + // The weight_ is transposed from initial paddle weight + MatrixPtr paddleWgt = Matrix::create( + weight_->getW()->getData(), iLayerSize_, oc_, false, false); + + // TODO(TJ): remove this print when do not need differ weights + std::ostringstream ostr; + paddleWgt->print(ostr); + VLOG(MKLDNN_ALL) << "Initial Weight from paddle: " << std::endl << ostr.str(); + + // The mkldnn weight is transposed from initial paddle matrix + MatrixPtr paddleWgtT; + paddleWgt->transpose(paddleWgtT, true); + weight_->getW()->copyFrom(*paddleWgtT); + hasInitedWgt_ = true; +} + +void MKLDNNFcLayer::convertWeightsToPaddle() { + MatrixPtr dnnWgt = weight_->getW(); + MatrixPtr paddleWgt; + dnnWgt->transpose(paddleWgt, true); + + // copy paddle weight and override on weight_ + MatrixPtr dnnWgtT = Matrix::create( + dnnWgt->getData(), dnnWgt->getWidth(), dnnWgt->getHeight(), false, false); + dnnWgtT->copyFrom(*paddleWgt); +} + +void MKLDNNFcLayer::reshape() { + const Argument& input = getInput(0); + int batchSize = input.getBatchSize(); + if (bs_ == batchSize) { + return; + } + bs_ = batchSize; + ih_ = input.getFrameHeight(); + iw_ = input.getFrameWidth(); + if (ih_ == 0) { + ih_ = 1; + } + if (iw_ == 0) { + iw_ = 1; + } + hasSpatial_ = true; + if (ih_ == 1 && iw_ == 1) { + hasSpatial_ = false; + } + CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize()); + ic_ = iLayerSize_ / (ih_ * iw_); + CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible"; + CHECK_EQ(size_t(oc_), getSize()); + printSizeInfo(); + + // reset output + output_.setFrameHeight(oh_); + output_.setFrameWidth(ow_); + resetOutput(bs_, oc_); + + // reset mkldnn forward + resetFwd(); + needResetBwd_ = true; + + convertWeightsFromPaddle(); +} + +void MKLDNNFcLayer::resetFwd() { + bool hasBias = biases_ && biases_->getW(); + real* iData = getInputValue(0)->getData(); + real* oData = getOutputValue()->getData(); + real* wData = weight_->getW()->getData(); + real* bData = hasBias ? biases_->getW()->getData() : NULL; + + // TODO(TJ): below create should be covered in MkldnnMatrix + // create memory desc + memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw) + : createMD({bs_, ic_}, format::nc); + memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw) + : createMD({oc_, ic_}, format::oi); + memory::desc bMD = bData != NULL ? createMD({oc_}, format::x) + : createMD({}, format::format_undef); + memory::desc oMD = createMD({bs_, oc_}, format::nc); + + // create memory primitive desc and memory self + inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); + wgtVal_.reset(new memory(memory::primitive_desc(wMD, engine_), wData)); + outVal_.reset(new memory(memory::primitive_desc(oMD, engine_), oData)); + + prop_kind pk = prop_kind::forward; + fc_fwd::desc fwdDesc = bData != NULL ? fc_fwd::desc(pk, iMD, wMD, bMD, oMD) + : fc_fwd::desc(pk, iMD, wMD, oMD); + fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); + + if (bData != NULL) { + biasVal_.reset(new memory(memory::primitive_desc(bMD, engine_), bData)); + fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); + } else { + fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_)); + } + pipelineFwd_.clear(); + pipelineFwd_.push_back(*fwd_); +} + +void MKLDNNFcLayer::resetBwd() { + if (!needResetBwd_) { + return; + } + needResetBwd_ = false; + + bool hasBias = biases_ && biases_->getWGrad(); + real* iData = getInputValue(0)->getData(); + real* iDiff = getInputGrad(0) != nullptr ? getInputGrad(0)->getData() : NULL; + real* oDiff = getOutputGrad()->getData(); + real* wDiff = weight_->getWGrad()->getData(); + real* bDiff = hasBias ? biases_->getWGrad()->getData() : NULL; + + /// backward weight + // create memory desc for backward memory + memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw) + : createMD({bs_, ic_}, format::nc); + memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw) + : createMD({oc_, ic_}, format::oi); + memory::desc oMD = createMD({bs_, oc_}, format::nc); + memory::desc bMD = bDiff != NULL ? createMD({oc_}, format::x) + : createMD({}, format::format_undef); + + if (inVal_) { + // update data + inVal_->set_data_handle(iData); + } else { + inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); + } + + // create memory primitive desc and memory self + wgtGrad_.reset(new memory(memory::primitive_desc(wMD, engine_), wDiff)); + outGrad_.reset(new memory(memory::primitive_desc(oMD, engine_), oDiff)); + + fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, iMD, wMD, oMD); + fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); + fc_bwdWgt::desc bwdWgtDesc = bDiff != NULL + ? fc_bwdWgt::desc(iMD, wMD, bMD, oMD) + : fc_bwdWgt::desc(iMD, wMD, oMD); + fc_bwdWgt::primitive_desc bwdWgtPD = + fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); + + if (bDiff != NULL) { + biasGrad_.reset(new memory(memory::primitive_desc(bMD, engine_), bDiff)); + bwdWgt_.reset( + new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_, *biasGrad_)); + } else { + bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_)); + } + pipelineBwd_.clear(); + pipelineBwd_.push_back(*bwdWgt_); + + /// backward data + if (iDiff == NULL) { + return; + } + fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(iMD, wMD, oMD); + fc_bwdData::primitive_desc bwdDataPD = + fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); + inGrad_.reset(new memory(memory::primitive_desc(iMD, engine_), iDiff)); + CHECK(wgtVal_) << "Should have weight memory"; + bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_)); + pipelineBwd_.push_back(*bwdData_); +} + +void MKLDNNFcLayer::forward(PassType passType) { + Layer::forward(passType); + reshape(); + + { + REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str()); + + // update input data + // since it might be changed if this is after data layer + real* iData = getInputValue(0)->getData(); + inVal_->set_data_handle(iData); + + // just submit forward pipeline + stream_->submit(pipelineFwd_); + } + + /* activation */ { + REGISTER_TIMER_INFO("FwActTimer", getName().c_str()); + forwardActivation(); + } +} + +void MKLDNNFcLayer::backward(const UpdateCallback& callback) { + /* Do derivation */ { + REGISTER_TIMER_INFO("BpActTimer", getName().c_str()); + backwardActivation(); + } + + { + REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str()); + resetBwd(); + + // update diff + real* oDiff = getOutputGrad()->getData(); + outGrad_->set_data_handle(oDiff); + + // just sumbmit backward pipeline + stream_->submit(pipelineBwd_); + } + + { + REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); + weight_->getParameterPtr()->incUpdate(callback); + if (biases_ && biases_->getWGrad()) { + biases_->getParameterPtr()->incUpdate(callback); + } + } +} +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..7954852a23f81d36d5fb0ae6a19768f419886fb1 --- /dev/null +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -0,0 +1,80 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "MKLDNNLayer.h" +#include "mkldnn.hpp" + +namespace paddle { + +/** + * @brief A subclass of MKLDNNLayer fc layer. + * + * The config file api is mkldnn_fc + */ +class MKLDNNFcLayer : public MKLDNNLayer { +protected: + // input layer size, can not be change after init + size_t iLayerSize_; // == ic * ih * iw + + // if has already init the weight + bool hasInitedWgt_; + + // if input layer has image size info (ih>1 && iw>1) + bool hasSpatial_; + + // fc weight and bias + std::unique_ptr weight_; + std::unique_ptr biases_; + +public: + explicit MKLDNNFcLayer(const LayerConfig& config) + : MKLDNNLayer(config), hasInitedWgt_(false), hasSpatial_(true) {} + + ~MKLDNNFcLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void convertWeightsFromPaddle() override; + + void convertWeightsToPaddle() override; + + void forward(PassType passType) override; + + void backward(const UpdateCallback& callback) override; + +protected: + /** + * reshape the input image sizes + * and reset output buffer size + * and reset mkldnn forward + */ + void reshape(); + + /** + * reset the forward primitve and memory + * only would be called when input size changes + */ + void resetFwd(); + + /** + * reset the backward primitve and memory for mkldnn fc + * only would be called when needed + */ + void resetBwd(); +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..63e29f447eede5ff9df8715bc9140b64ab7f7d17 --- /dev/null +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -0,0 +1,132 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "Layer.h" +#include "MKLDNNBase.h" +#include "mkldnn.hpp" + +DECLARE_bool(use_mkldnn); +DECLARE_bool(use_mkldnn_wgt); + +namespace paddle { + +class MKLDNNLayer; +typedef std::shared_ptr MKLDNNLayerPtr; + +/** + * @brief Base class of MKLDNNlayer. + * + */ +class MKLDNNLayer : public Layer { +protected: + // batch size + int bs_; + // input image channel, height and width + int ic_, ih_, iw_; + // output image channel, height and width + int oc_, oh_, ow_; + + // backward also need reset after reset forward handle + bool needResetBwd_; + + // mkldnn engine, stream and primivtives + mkldnn::engine engine_; + std::shared_ptr stream_; + std::shared_ptr fwd_; + std::shared_ptr bwdWgt_; + std::shared_ptr bwdData_; + std::vector pipelineFwd_; + std::vector pipelineBwd_; + + // TODO(TJ): change below memory as MKLDNNMatrixPtr type + std::shared_ptr inVal_; + std::shared_ptr inGrad_; + std::shared_ptr outVal_; + std::shared_ptr outGrad_; + std::shared_ptr wgtVal_; + std::shared_ptr wgtGrad_; + std::shared_ptr biasVal_; + std::shared_ptr biasGrad_; + +public: + explicit MKLDNNLayer(const LayerConfig& config) + : Layer(config), + bs_(0), + ic_(0), + ih_(0), + iw_(0), + oc_(0), + oh_(0), + ow_(0), + needResetBwd_(true), + engine_(mkldnn::engine::cpu, 0), + stream_(nullptr), + fwd_(nullptr), + bwdWgt_(nullptr), + bwdData_(nullptr) {} + + ~MKLDNNLayer() {} + + virtual bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + if (!Layer::init(layerMap, parameterMap)) { + return false; + } + + CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." + << "Please set WITH_MKLDNN=ON " + << "and set use_mkldnn=True"; + stream_.reset(new MKLDNNStream()); + engine_ = CPUEngine::Instance().getEngine(); + + // TODO(TJ): deivecId + return true; + } + + /** + * convert weight from paddle format to mkldnn format + * weight_ will be override + */ + virtual void convertWeightsFromPaddle() {} + + /** + * convert mkldnn weight to paddle format + * weight_ will be override + */ + virtual void convertWeightsToPaddle() {} + + /** + * print info about sizes + */ + virtual void printSizeInfo() { + VLOG(MKLDNN_SIZES) << getName() << ": bs: " << bs_ << ", ic: " << ic_ + << ", ih: " << ih_ << ", iw: " << iw_ << ", oc: " << oc_ + << ", oh: " << oh_ << ", ow: " << ow_; + } + + // TODO(TJ): move to MkldnnMatrix + // create memory desc + inline mkldnn::memory::desc createMD( + mkldnn::memory::dims dims, + mkldnn::memory::format fmt, + mkldnn::memory::data_type type = mkldnn::memory::data_type::f32) { + // TODO(TJ): isFmtSuppoted(fmt) + return mkldnn::memory::desc(dims, type, fmt); + } +}; + +} // namespace paddle diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 294d5f115dc66f209490d7c11f76cac876255043..c2a2993620492a9ec5dae932ff1292ced2c00064 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -18,6 +18,15 @@ add_unittest_without_exec(test_LayerGrad add_test(NAME test_LayerGrad COMMAND test_LayerGrad) +########## test_Mkldnn layers and activations ########## +if(WITH_MKLDNN) + add_unittest_without_exec(test_MKLDNN + test_MKLDNN.cpp + MKLDNNTester.cpp + LayerGradUtil.cpp) + add_test(NAME test_MKLDNN COMMAND test_MKLDNN) +endif() + ################ test_CRFLayerGrad #################### add_unittest_without_exec(test_CRFLayerGrad test_CRFLayerGrad.cpp diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp new file mode 100644 index 0000000000000000000000000000000000000000..99c8c4948c9b05ad15d1217ebb70026bbd48453f --- /dev/null +++ b/paddle/gserver/tests/MKLDNNTester.cpp @@ -0,0 +1,369 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MKLDNNTester.h" +#include "paddle/gserver/layers/MKLDNNBase.h" +#include "paddle/gserver/layers/MKLDNNLayer.h" + +namespace paddle { + +// init data layer and test layer of both dnn and reference +void MKLDNNTester::reset(const TestConfig& dnn, + const TestConfig& ref, + size_t batchSize) { + const bool trans = false; + const bool useGpu = false; + + // clear + configs_.clear(); + layerNames_.clear(); + dataLayers_.clear(); + datas_.clear(); + layerMaps_.clear(); + parameters_.clear(); + testLayers_.clear(); + + // resize + configs_.resize(NUM); + layerNames_.resize(NUM); + dataLayers_.resize(NUM); + datas_.resize(NUM); + layerMaps_.resize(NUM); + parameters_.resize(NUM); + testLayers_.resize(NUM); + + // reset configs and layer names + configs_[DNN] = dnn; + configs_[REF] = ref; + layerNames_[DNN] = "mkldnn"; // the first is mkldnn layer + layerNames_[REF] = "reference"; // second is reference layer + + // reset others + for (size_t i = 0; i < NUM; ++i) { + configs_[i].layerConfig.set_name(layerNames_[i]); + initDataLayer(configs_[i], + &(dataLayers_[i]), + &(datas_[i]), + &(layerMaps_[i]), + layerNames_[i], + batchSize, + trans, + useGpu); + initTestLayer( + configs_[i], &(layerMaps_[i]), &(parameters_[i]), &(testLayers_[i])); + } + dnnLayer_ = testLayers_[DNN]; + refLayer_ = testLayers_[REF]; + EXPECT_EQ(dataLayers_[DNN].size(), dataLayers_[REF].size()); + EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size()); + + setInputImgSize(); +} + +void MKLDNNTester::setInputImgSize() { + for (size_t n = 0; n < dataLayers_.size(); ++n) { + for (size_t i = 0; i < dataLayers_[n].size(); ++i) { + // TODO(TJ): fix me when concat and elewise ready + dataLayers_[n][i]->getOutput().setFrameHeight(ih_); + dataLayers_[n][i]->getOutput().setFrameWidth(iw_); + } + } +} + +// init randome parameters of ref, and copy to mkldnn +void MKLDNNTester::randomWgtDatas() { + EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size()); + for (size_t i = 0; i < parameters_[REF].size(); ++i) { + const VectorPtr& dnnValue = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); + const VectorPtr& refValue = parameters_[REF][i]->getBuf(PARAMETER_VALUE); + parameters_[REF][i]->randomize(); + dnnValue->copyFrom(*refValue); + + VLOG(lvl_) << "Random weight data " << parameters_[DNN][i]->getName(); + printVector(dnnValue); + } +} + +// random botdata of ref layer and copy same to mkldnn +void MKLDNNTester::randomBotDatas() { + CHECK_EQ(dataLayers_.size(), NUM); + for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { + dataLayers_[REF][i]->getOutputValue()->randomizeUniform(); + dataLayers_[DNN][i]->getOutputValue()->copyFrom( + *(dataLayers_[REF][i]->getOutputValue())); + VLOG(lvl_) << "Input " << i << " data:"; + printMatrix(dataLayers_[REF][i]->getOutputValue()); + } +} + +void MKLDNNTester::randomTopDiffs() { + refLayer_->getOutputGrad()->randomizeUniform(); + dnnLayer_->getOutputGrad()->copyFrom(*(refLayer_->getOutputGrad())); + VLOG(lvl_) << "Random dom Backward Input, TopDiff: "; + printMatrix(refLayer_->getOutputGrad()); +} + +void MKLDNNTester::checkForward() { + printTopDatas(); + double delta = compareMatrix(testLayers_[DNN]->getOutputValue(), + testLayers_[REF]->getOutputValue()); + VLOG(MKLDNN_ALL) << "Check Forward"; + EXPECT_LE(fabs(delta), eps_); +} + +void MKLDNNTester::checkBackwardData() { + // TODO(TJ): uncomment me when batch norm ready + // const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm"; + for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { + const MatrixPtr& dnnDiff = dataLayers_[DNN][i]->getOutputGrad(); + const MatrixPtr& refDiff = dataLayers_[REF][i]->getOutputGrad(); + VLOG(lvl_) << "Mkldnn Backward Output BotDiff " << i; + printMatrix(dnnDiff); + VLOG(lvl_) << "Reference Backward Output BotDiff " << i; + printMatrix(refDiff); + + double delta = compareMatrix(dnnDiff, refDiff); + EXPECT_LE(fabs(delta), eps_); + // TODO(TJ): uncomment me when batch norm ready + // if (isBN) { + // // the other two inputs in batch norm are for moving mean and var + // break; + // } + } +} + +void MKLDNNTester::checkBackwardWgts() { + CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); + vector dnnWgts; // used to temply save mkldnn weights + saveWgt(parameters_[DNN], dnnWgts); + + const MKLDNNLayerPtr dnnlayer = + std::dynamic_pointer_cast(dnnLayer_); + CHECK(dnnlayer); + dnnlayer->convertWeightsToPaddle(); + for (size_t i = 0; i < parameters_[DNN].size(); ++i) { + const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); + const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE); + VLOG(lvl_) << "Mkldnn Output weight " << parameters_[DNN][i]->getName(); + printVector(dnn); + VLOG(lvl_) << "Reference Output weight " << parameters_[REF][i]->getName(); + printVector(ref); + + double delta = compareVector(dnn, ref); + EXPECT_LE(fabs(delta), eps_); + } + + VLOG(MKLDNN_ALL) << "Restore dnn weights before comapre"; + restoreWgt(dnnWgts, parameters_[DNN]); +} + +void MKLDNNTester::saveWgt(const vector& from, + vector& to) { + const bool useGpu = false; + to.resize(from.size()); + for (size_t i = 0; i < to.size(); ++i) { + const VectorPtr& wgt = from[i]->getBuf(PARAMETER_VALUE); + to[i] = Vector::create(wgt->getSize(), useGpu); + to[i]->copyFrom(*wgt); + } +} + +void MKLDNNTester::restoreWgt(const vector& from, + vector& to) { + CHECK_EQ(from.size(), to.size()); + for (size_t i = 0; i < from.size(); ++i) { + const VectorPtr& wgt = to[i]->getBuf(PARAMETER_VALUE); + wgt->copyFrom(*from[i]); + } +} + +// clear parameters grad +void MKLDNNTester::clearWgtDiffs() { + for (size_t n = 0; n < parameters_.size(); ++n) { + for (size_t i = 0; i < parameters_[n].size(); ++i) { + const VectorPtr& grad = parameters_[n][i]->getBuf(PARAMETER_GRADIENT); + if (grad) { + grad->zeroMem(); + } + } + } +} + +void MKLDNNTester::clearBotDiffs() { + // dnn and ref + for (size_t n = 0; n < dataLayers_.size(); ++n) { + // all inputs layers + for (size_t i = 0; i < dataLayers_[n].size(); ++i) { + dataLayers_[n][i]->getOutputGrad()->zeroMem(); + } + } +} + +void MKLDNNTester::clearBotDiffs(int n) { + CHECK_LT(n, NUM); + // all inputs layers + for (size_t i = 0; i < dataLayers_[n].size(); ++i) { + dataLayers_[n][i]->getOutputGrad()->zeroMem(); + } +} + +void MKLDNNTester::clearTopDatas() { + for (size_t i = 0; i < testLayers_.size(); ++i) { + testLayers_[i]->getOutputValue()->zeroMem(); + } +} + +void MKLDNNTester::printTopDatas() { + if (!log_) { + return; + } + + for (int n = 0; n < NUM; ++n) { + VLOG(lvl_) << testLayers_[n]->getType() << " forward output TopData: "; + printMatrix(testLayers_[n]->getOutputValue()); + } +} + +void MKLDNNTester::printMatrix(const MatrixPtr& m) { + if (!log_) { + return; + } + + std::ostringstream ostr; + m->print(ostr); + VLOG(lvl_) << std::endl << ostr.str(); +} + +void MKLDNNTester::printVector(const VectorPtr& v) { + if (!log_) { + return; + } + + std::ostringstream ostr; + v->print(ostr, v->getSize()); + VLOG(lvl_) << std::endl << ostr.str(); +} + +double MKLDNNTester::getDelta(const real* d1, + const real* d2, + size_t len, + const float failRate, + const float thres) { + double delta = 0, sum = 0; + int failCnt = 0; + const double eps = 1e-5; + double maxOut = 0; + for (size_t i = 0; i < len; ++i) { + double ref = fabs(d2[i]); + double diff = fabs(d1[i] - d2[i]); + delta += diff; + sum += ref; + if (ref > eps && fabs(d1[i]) > eps && diff / ref > thres) { + maxOut = std::max(maxOut, diff / ref); + failCnt++; + } + } + EXPECT_TRUE(std::isnormal(sum)); + EXPECT_FALSE(std::isinf(sum)); + EXPECT_FALSE(std::isnan(delta)); + VLOG(MKLDNN_ALL) << "reference avg data: " << sum / len + << ", delta: " << delta / sum << ", failCnt:" << failCnt; + return (failCnt / (float)len) > failRate ? maxOut : delta / sum; +} + +double MKLDNNTester::compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2) { + CHECK_EQ(m1->getElementCnt(), m2->getElementCnt()); + return getDelta(m1->getData(), m2->getData(), m1->getElementCnt()); +} + +double MKLDNNTester::compareVector(const VectorPtr& v1, const VectorPtr& v2) { + CHECK_EQ(v1->getSize(), v2->getSize()); + return getDelta(v1->getData(), v2->getData(), v1->getSize()); +} + +void MKLDNNTester::runOnce() { + // test forward + randomBotDatas(); + dnnLayer_->forward(PASS_TRAIN); + refLayer_->forward(PASS_TRAIN); + checkForward(); + + // test backward + randomTopDiffs(); + dnnLayer_->backward(nullptr); + refLayer_->backward(nullptr); + checkBackwardData(); + checkBackwardWgts(); + + // clear buffers + // ref code will addto the diff, dnn code will writeto it + // and clearTopDatas() and clearWgtDiffs() should be coverd by test layers + clearBotDiffs(REF); +} + +void MKLDNNTester::run(const TestConfig& dnn, + const TestConfig& ref, + size_t batchSize, + size_t inputImgH, + size_t inputImgW, + size_t iter, + float epsilon, + bool log, + int level) { + VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: " << dnn.layerConfig.type() + << " vs " << ref.layerConfig.type(); + ih_ = inputImgH; + iw_ = inputImgW; + iter_ = iter; + eps_ = epsilon; + log_ = log; + lvl_ = level; + + // Firstly test FLAGS_use_mkldnn_wgt = false + FLAGS_use_mkldnn_wgt = false; + // reset and run once + reset(dnn, ref, batchSize); + randomWgtDatas(); + clearWgtDiffs(); + clearBotDiffs(); + for (size_t i = 0; i < iter_; ++i) { + VLOG(MKLDNN_TESTS) << "Check Iteration " << i; + runOnce(); + } + + // Then test FLAGS_use_mkldnn_wgt = true + FLAGS_use_mkldnn_wgt = true; + // after run once the mkldnn weight has been stored in dnnlayer + // then save the weights and restart again + vector dnnWgts, refWgts; + CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); + saveWgt(parameters_[DNN], dnnWgts); + saveWgt(parameters_[REF], refWgts); + + // restart again with flag true + reset(dnn, ref, batchSize); + + // restore wgt + restoreWgt(dnnWgts, parameters_[DNN]); + restoreWgt(refWgts, parameters_[REF]); + clearWgtDiffs(); + clearBotDiffs(); + + for (size_t i = 0; i < iter_; ++i) { + VLOG(MKLDNN_TESTS) << "Check Iteration " << i; + runOnce(); + } +} + +} // namespace paddle diff --git a/paddle/gserver/tests/MKLDNNTester.h b/paddle/gserver/tests/MKLDNNTester.h new file mode 100644 index 0000000000000000000000000000000000000000..522eeaf24b1949abac057a1e59e9977610be23c0 --- /dev/null +++ b/paddle/gserver/tests/MKLDNNTester.h @@ -0,0 +1,120 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include "LayerGradUtil.h" +#include "paddle/gserver/layers/MKLDNNBase.h" + +namespace paddle { + +/** + * @brief test the functionality of Mkldnnlayers + * refer to paddle original function + */ +class MKLDNNTester { + enum { + DNN = 0, // MKLDNN layer + REF = 1, // Reference layer + NUM = 2, // Number of total + }; + +protected: + std::vector configs_; + vector layerNames_; + vector> dataLayers_; + vector> datas_; + vector layerMaps_; + vector> parameters_; + vector testLayers_; + LayerPtr dnnLayer_, refLayer_; + + /// run some iterations, all the result should pass + size_t iter_; + /// whether to print out the details + bool log_; + /// vlog level to print the matrix details datas + int lvl_; + /// epsilon + float eps_; + /// input image size, default 1 + size_t ih_, iw_; + +public: + explicit MKLDNNTester(size_t iter = 3, float epsilon = 1e-4) { + iter_ = iter; + eps_ = epsilon; + log_ = false; + lvl_ = MKLDNN_ALL; + } + + ~MKLDNNTester() {} + +public: + void run(const TestConfig& dnn, + const TestConfig& ref, + size_t batchSize, + size_t inputImgH = 1, + size_t inputImgW = 1, + size_t iter = 3, + float epsilon = 1e-4, + bool log = false, + int level = MKLDNN_ALL); + void setLogLevel(int lvl) { lvl_ = lvl; } + +private: + void reset(const TestConfig& dnn, const TestConfig& ref, size_t batchSize); + void setInputImgSize(); + void runOnce(); + + void randomWgtDatas(); + void randomBotDatas(); + void randomTopDiffs(); + + void checkForward(); + void checkBackwardData(); + void checkBackwardWgts(); + + void clearWgtDiffs(); + void clearBotDiffs(); + void clearBotDiffs(int n); // clear specific layer + void clearTopDatas(); + + void printTopDatas(); + void printMatrix(const MatrixPtr& m); + void printVector(const VectorPtr& v); + + void saveWgt(const vector& from, vector& to); + void restoreWgt(const vector& from, vector& to); + + double compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2); + double compareVector(const VectorPtr& v1, const VectorPtr& v2); + + /** + * Get delta percent + * if many(>failRate) wrong(abs(dnn-ref)/abs(ref)>thres) points return the + * max(diff/ref) + * else return sum(abs(a-b)) / sum(abs(b)) + * The return value should smaller than eps when passing. + */ + double getDelta(const real* d1, + const real* d2, + size_t len, + const float failRate = 1e-3, + const float thres = 0.1); +}; + +} // namespace paddle diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e1d2270df24331914f3a51acc90a518084b3ce4e --- /dev/null +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -0,0 +1,76 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include "MKLDNNTester.h" +#include "ModelConfig.pb.h" + +using namespace paddle; // NOLINT + +DECLARE_bool(thread_local_rand_use_global_seed); +DECLARE_bool(use_gpu); +DECLARE_bool(use_mkldnn); + +struct testFCDesc { + int bs; + int ic; + int oc; + int ih, iw; // oh == ow == 1 +}; + +void testFcLayer(const testFCDesc& pm) { + const std::string compareTypes[] = {"mkldnn_fc", "fc"}; + TestConfig cfg; + cfg.layerConfig.set_type(compareTypes[0]); + cfg.layerConfig.set_size(pm.oc); + cfg.inputDefs.push_back( + {INPUT_DATA, + "layer_0", + /* size of input layer= */ size_t(pm.ic * pm.ih * pm.iw), + /* size of weight= */ size_t(pm.oc * pm.ic * pm.ih * pm.iw)}); + cfg.layerConfig.add_inputs(); + + MKLDNNTester tester; + for (auto biasSize : {pm.oc, 0}) { + cfg.biasSize = biasSize; + TestConfig ref = cfg; + ref.layerConfig.set_type(compareTypes[1]); + for (auto bs : {pm.bs, 1}) { + tester.run(cfg, ref, bs, pm.ih, pm.iw); + } + } +} + +TEST(MKLDNNLayer, FcLayer) { + testFcLayer({/*bs*/ 2, /*ic*/ 2, /*oc*/ 3, /*ih*/ 1, /*iw*/ 1}); + testFcLayer({/*bs*/ 3, /*ic*/ 7, /*oc*/ 19, /*ih*/ 1, /*iw*/ 1}); + testFcLayer({/*bs*/ 8, /*ic*/ 16, /*oc*/ 32, /*ih*/ 13, /*iw*/ 13}); + testFcLayer({/*bs*/ 4, /*ic*/ 12, /*oc*/ 18, /*ih*/ 13, /*iw*/ 11}); + testFcLayer({/*bs*/ 2, /*ic*/ 64, /*oc*/ 32, /*ih*/ 16, /*iw*/ 16}); + testFcLayer({/*bs*/ 15, /*ic*/ 3, /*oc*/ 6, /*ih*/ 16, /*iw*/ 16}); +} + +// TODO(TJ): add branch test + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + FLAGS_use_gpu = false; + FLAGS_use_mkldnn = true; + initMain(argc, argv); + FLAGS_thread_local_rand_use_global_seed = true; + srand(1); + return RUN_ALL_TESTS(); +} diff --git a/paddle/trainer/TrainerConfigHelper.cpp b/paddle/trainer/TrainerConfigHelper.cpp index 133e2be104c6fbfddefd8698d2b6aa8315c56c70..eba40862b926cfe863c569e73a6a3ceabcf1f3b4 100644 --- a/paddle/trainer/TrainerConfigHelper.cpp +++ b/paddle/trainer/TrainerConfigHelper.cpp @@ -28,6 +28,8 @@ DECLARE_bool(with_cost); DECLARE_bool(with_gpu); DECLARE_bool(parallel_nn); DECLARE_string(config_args); +DECLARE_bool(use_mkldnn); +DECLARE_bool(use_mkldnn_wgt); const char *kConfigParserModuleName = "paddle.trainer.config_parser"; const char *kConfigParserFuncName = "parse_config_and_serialize"; @@ -44,6 +46,8 @@ TrainerConfigHelper::TrainerConfigHelper(const std::string &configFilePath) configArgs << "trainer_id=" << FLAGS_trainer_id << ",local=" << FLAGS_local << ",with_cost=" << FLAGS_with_cost << ",use_gpu=" << FLAGS_use_gpu << ",parallel_nn=" << FLAGS_parallel_nn + << ",use_mkldnn=" << FLAGS_use_mkldnn + << ",use_mkldnn_wgt=" << FLAGS_use_mkldnn_wgt << ",cudnn_version=" << hl_get_cudnn_lib_version(); if (!FLAGS_config_args.empty()) { configArgs << "," << FLAGS_config_args; diff --git a/paddle/utils/Flags.cpp b/paddle/utils/Flags.cpp index 320f671ed97dbadc4fa1b4b52d5611cf9239e7dd..600c83a8487191895de635dd8433f6c44e86ce77 100644 --- a/paddle/utils/Flags.cpp +++ b/paddle/utils/Flags.cpp @@ -20,6 +20,14 @@ DEFINE_bool(use_gpu, false, "Only support CPU training"); DEFINE_bool(use_gpu, true, "Whether to use GPU for training"); #endif +#ifdef PADDLE_USE_MKLDNN +// TODO(TJ): change to true when MKLDNN layers support multi-inputs +DEFINE_bool(use_mkldnn, false, "Default still keep use CPU training"); +#else +DEFINE_bool(use_mkldnn, false, "Only support CPU training"); +#endif + +DEFINE_bool(use_mkldnn_wgt, false, "Init weight from CPU weight"); DEFINE_bool(parallel_nn, false, "Whether to use multi-threads to calculate one neural network." diff --git a/paddle/utils/Flags.h b/paddle/utils/Flags.h index dc4faef8331ed47b9ce3e952389b6469cd9fda2e..0aca4c0ee036ee8490c0ceca7279df876dc21947 100644 --- a/paddle/utils/Flags.h +++ b/paddle/utils/Flags.h @@ -40,3 +40,5 @@ DECLARE_bool(show_layer_stat); DECLARE_string(predict_file); DECLARE_bool(prev_batch_state); DECLARE_string(init_model_path); +DECLARE_bool(use_mkldnn); +DECLARE_bool(use_mkldnn_wgt); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index b7b696ef0c13e1bae2e910e08d1a1ea3e45cd5d5..da99e5bd53458aa0cb201a3525e28c66ab63c52d 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1604,6 +1604,8 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase): @config_layer('fc') class FCLayer(LayerBase): + layer_type = 'fc' + def __init__(self, name, size, @@ -1611,14 +1613,27 @@ class FCLayer(LayerBase): bias=True, error_clipping_threshold=None, **xargs): - super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs) + use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) + use_mkldnn_wgt = bool( + int(g_command_config_args.get("use_mkldnn_wgt", 0))) + if use_mkldnn: + self.layer_type = 'mkldnn_fc' + config_assert( + len(inputs) == 1, + "MkldnnFCLayer support one and only one input!") + super(FCLayer, self).__init__( + name, self.layer_type, size, inputs=inputs, **xargs) for input_index in xrange(len(self.inputs)): input_layer = self.get_input_layer(input_index) psize = self.config.size * input_layer.size dims = [input_layer.size, self.config.size] format = self.inputs[input_index].format sparse = format == "csr" or format == "csc" - + if use_mkldnn: + config_assert(not sparse, + "MkldnnFCLayer do not support sparse format yet") + if use_mkldnn_wgt: + dims = [self.config.size, input_layer.size] if sparse: psize = self.inputs[input_index].nnz else: @@ -1631,6 +1646,11 @@ class FCLayer(LayerBase): self.config.error_clipping_threshold = error_clipping_threshold +@config_layer('mkldnn_fc') +class MkldnnFcLayer(FCLayer): + layer_type = 'mkldnn_fc' + + @config_layer('selective_fc') class SelectiveFCLayer(LayerBase): def __init__(self,