“ba3b2eb3a5c288bd898d057a77682cecf043836c”上不存在“develop/doc/howto/usage/cluster/fabric_en.html”
提交 2e87d747 编写于 作者: Y Yiqun Liu 提交者: GitHub

Merge pull request #3337 from tensor-tang/merge

Enable mkldnn_fc for general format
...@@ -144,7 +144,7 @@ if(WITH_GPU) ...@@ -144,7 +144,7 @@ if(WITH_GPU)
endif(WITH_GPU) endif(WITH_GPU)
if(WITH_MKLDNN) if(WITH_MKLDNN)
list(APPEND EXTERNAL_LIBS ${MKLDNN_LIBRARY} ${MKLDNN_IOMP_LIB}) list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB} ${MKLDNN_IOMP_LIB})
endif() endif()
if(USE_NNPACK) if(USE_NNPACK)
......
...@@ -23,6 +23,17 @@ endmacro() ...@@ -23,6 +23,17 @@ endmacro()
filter_test(GSERVER_HEADER) filter_test(GSERVER_HEADER)
filter_test(GSERVER_SOURCES) filter_test(GSERVER_SOURCES)
if(NOT WITH_MKLDNN)
file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h")
file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp")
list(REMOVE_ITEM GSERVER_HEADER ${DNN_HEADER})
list(REMOVE_ITEM GSERVER_SOURCES ${DNN_SOURCES})
message(STATUS "Skip compiling with MKLDNNLayers and MKLDNNActivations")
else()
message(STATUS "Compile with MKLDNNLayers and MKLDNNActivations")
endif()
if(NOT WITH_GPU) if(NOT WITH_GPU)
list(REMOVE_ITEM GSERVER_HEADER list(REMOVE_ITEM GSERVER_HEADER
layers/CudnnConvBaseLayer.h layers/CudnnConvBaseLayer.h
......
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "mkldnn.hpp"
namespace paddle {
typedef enum {
MKLDNN_BASE = 1, // basical info of MKLDNN
MKLDNN_TESTS = 1, // gtest info of MKLDNN
MKLDNN_SIZES = 2, // size info of MKLDNN
MKLDNN_FMTS = 3, // format info of MKLDNN
MKLDNN_ALL = 4, // show all info of MKLDNN
} MKLDNN_LOG_LEVEL;
/**
* @brief MKLDNN CPU engine.
*
*/
class CPUEngine {
public:
static CPUEngine& Instance() {
// Thread-safe in C++11.
static CPUEngine myInstance;
return myInstance;
}
// Disallow copy or move
CPUEngine(const CPUEngine&) = delete; // Copy constructor
CPUEngine(CPUEngine&&) = delete; // Move constructor
CPUEngine& operator=(const CPUEngine&) = delete; // Copy assignment
CPUEngine& operator=(CPUEngine&&) = delete; // Move assignment
mkldnn::engine& getEngine() { return cpuEngine_; }
protected:
CPUEngine() : cpuEngine_(mkldnn::engine::cpu, 0) {}
// CPUEngine() : cpuEngine_(mkldnn::engine::cpu_lazy, 0) {}
~CPUEngine() {}
private:
mkldnn::engine cpuEngine_;
};
/**
* @brief MKLDNN Stream.
*
*/
class MKLDNNStream {
public:
MKLDNNStream() : ready_(false) { resetState(); }
virtual ~MKLDNNStream() {}
/**
* @brief Submit stream
* @param prims The primitives vector
* @param block Waiting for the stream to complete
*/
void submit(std::vector<mkldnn::primitive>& prims, bool block = true) {
resetState();
stream_->submit(prims).wait(block);
ready_ = false;
}
/**
* @brief Reset the mkldnn stream
*/
void resetState() {
if (ready_) {
return;
}
// TODO(TJ): change me when mkldnn have method to reset this state
// stream_.reset(new mkldnn::stream(mkldnn::stream::kind::lazy));
stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager));
ready_ = true;
}
private:
bool ready_;
std::shared_ptr<mkldnn::stream> stream_;
};
} // namespace paddle
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MKLDNNFcLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
using namespace mkldnn; // NOLINT
typedef memory::format format;
typedef inner_product_forward fc_fwd;
typedef inner_product_backward_weights fc_bwdWgt;
typedef inner_product_backward_data fc_bwdData;
namespace paddle {
REGISTER_LAYER(mkldnn_fc, MKLDNNFcLayer);
bool MKLDNNFcLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
if (!MKLDNNLayer::init(layerMap, parameterMap)) {
return false;
}
CHECK_EQ(inputLayers_.size(), 1) << "Only support one input layer yet";
CHECK_EQ(inputLayers_.size(), parameters_.size());
CHECK(!parameters_[0]->isSparse()) << "Do not support sparse yet";
// output size, cat not be changed
oc_ = getSize();
oh_ = 1;
ow_ = 1;
// input size can not change in FC
iLayerSize_ = inputLayers_[0]->getSize();
CHECK_EQ(parameters_[0]->getSize(), iLayerSize_ * oc_);
// create weight
weight_ =
std::unique_ptr<Weight>(new Weight(oc_, iLayerSize_, parameters_[0], 0));
// create biases
if (biasParameter_.get() != NULL) {
biases_ = std::unique_ptr<Weight>(new Weight(1, oc_, biasParameter_));
}
return true;
}
void MKLDNNFcLayer::convertWeightsFromPaddle() {
if (FLAGS_use_mkldnn_wgt) {
return;
}
if (hasInitedWgt_) {
return;
}
// The weight_ is transposed from initial paddle weight
MatrixPtr paddleWgt = Matrix::create(
weight_->getW()->getData(), iLayerSize_, oc_, false, false);
// TODO(TJ): remove this print when do not need differ weights
std::ostringstream ostr;
paddleWgt->print(ostr);
VLOG(MKLDNN_ALL) << "Initial Weight from paddle: " << std::endl << ostr.str();
// The mkldnn weight is transposed from initial paddle matrix
MatrixPtr paddleWgtT;
paddleWgt->transpose(paddleWgtT, true);
weight_->getW()->copyFrom(*paddleWgtT);
hasInitedWgt_ = true;
}
void MKLDNNFcLayer::convertWeightsToPaddle() {
MatrixPtr dnnWgt = weight_->getW();
MatrixPtr paddleWgt;
dnnWgt->transpose(paddleWgt, true);
// copy paddle weight and override on weight_
MatrixPtr dnnWgtT = Matrix::create(
dnnWgt->getData(), dnnWgt->getWidth(), dnnWgt->getHeight(), false, false);
dnnWgtT->copyFrom(*paddleWgt);
}
void MKLDNNFcLayer::reshape() {
const Argument& input = getInput(0);
int batchSize = input.getBatchSize();
if (bs_ == batchSize) {
return;
}
bs_ = batchSize;
ih_ = input.getFrameHeight();
iw_ = input.getFrameWidth();
if (ih_ == 0) {
ih_ = 1;
}
if (iw_ == 0) {
iw_ = 1;
}
hasSpatial_ = true;
if (ih_ == 1 && iw_ == 1) {
hasSpatial_ = false;
}
CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize());
ic_ = iLayerSize_ / (ih_ * iw_);
CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible";
CHECK_EQ(size_t(oc_), getSize());
printSizeInfo();
// reset output
output_.setFrameHeight(oh_);
output_.setFrameWidth(ow_);
resetOutput(bs_, oc_);
// reset mkldnn forward
resetFwd();
needResetBwd_ = true;
convertWeightsFromPaddle();
}
void MKLDNNFcLayer::resetFwd() {
bool hasBias = biases_ && biases_->getW();
real* iData = getInputValue(0)->getData();
real* oData = getOutputValue()->getData();
real* wData = weight_->getW()->getData();
real* bData = hasBias ? biases_->getW()->getData() : NULL;
// TODO(TJ): below create should be covered in MkldnnMatrix
// create memory desc
memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw)
: createMD({bs_, ic_}, format::nc);
memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw)
: createMD({oc_, ic_}, format::oi);
memory::desc bMD = bData != NULL ? createMD({oc_}, format::x)
: createMD({}, format::format_undef);
memory::desc oMD = createMD({bs_, oc_}, format::nc);
// create memory primitive desc and memory self
inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData));
wgtVal_.reset(new memory(memory::primitive_desc(wMD, engine_), wData));
outVal_.reset(new memory(memory::primitive_desc(oMD, engine_), oData));
prop_kind pk = prop_kind::forward;
fc_fwd::desc fwdDesc = bData != NULL ? fc_fwd::desc(pk, iMD, wMD, bMD, oMD)
: fc_fwd::desc(pk, iMD, wMD, oMD);
fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_);
if (bData != NULL) {
biasVal_.reset(new memory(memory::primitive_desc(bMD, engine_), bData));
fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_));
} else {
fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_));
}
pipelineFwd_.clear();
pipelineFwd_.push_back(*fwd_);
}
void MKLDNNFcLayer::resetBwd() {
if (!needResetBwd_) {
return;
}
needResetBwd_ = false;
bool hasBias = biases_ && biases_->getWGrad();
real* iData = getInputValue(0)->getData();
real* iDiff = getInputGrad(0) != nullptr ? getInputGrad(0)->getData() : NULL;
real* oDiff = getOutputGrad()->getData();
real* wDiff = weight_->getWGrad()->getData();
real* bDiff = hasBias ? biases_->getWGrad()->getData() : NULL;
/// backward weight
// create memory desc for backward memory
memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw)
: createMD({bs_, ic_}, format::nc);
memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw)
: createMD({oc_, ic_}, format::oi);
memory::desc oMD = createMD({bs_, oc_}, format::nc);
memory::desc bMD = bDiff != NULL ? createMD({oc_}, format::x)
: createMD({}, format::format_undef);
if (inVal_) {
// update data
inVal_->set_data_handle(iData);
} else {
inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData));
}
// create memory primitive desc and memory self
wgtGrad_.reset(new memory(memory::primitive_desc(wMD, engine_), wDiff));
outGrad_.reset(new memory(memory::primitive_desc(oMD, engine_), oDiff));
fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, iMD, wMD, oMD);
fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_);
fc_bwdWgt::desc bwdWgtDesc = bDiff != NULL
? fc_bwdWgt::desc(iMD, wMD, bMD, oMD)
: fc_bwdWgt::desc(iMD, wMD, oMD);
fc_bwdWgt::primitive_desc bwdWgtPD =
fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD);
if (bDiff != NULL) {
biasGrad_.reset(new memory(memory::primitive_desc(bMD, engine_), bDiff));
bwdWgt_.reset(
new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_, *biasGrad_));
} else {
bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_));
}
pipelineBwd_.clear();
pipelineBwd_.push_back(*bwdWgt_);
/// backward data
if (iDiff == NULL) {
return;
}
fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(iMD, wMD, oMD);
fc_bwdData::primitive_desc bwdDataPD =
fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD);
inGrad_.reset(new memory(memory::primitive_desc(iMD, engine_), iDiff));
CHECK(wgtVal_) << "Should have weight memory";
bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_));
pipelineBwd_.push_back(*bwdData_);
}
void MKLDNNFcLayer::forward(PassType passType) {
Layer::forward(passType);
reshape();
{
REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str());
// update input data
// since it might be changed if this is after data layer
real* iData = getInputValue(0)->getData();
inVal_->set_data_handle(iData);
// just submit forward pipeline
stream_->submit(pipelineFwd_);
}
/* activation */ {
REGISTER_TIMER_INFO("FwActTimer", getName().c_str());
forwardActivation();
}
}
void MKLDNNFcLayer::backward(const UpdateCallback& callback) {
/* Do derivation */ {
REGISTER_TIMER_INFO("BpActTimer", getName().c_str());
backwardActivation();
}
{
REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str());
resetBwd();
// update diff
real* oDiff = getOutputGrad()->getData();
outGrad_->set_data_handle(oDiff);
// just sumbmit backward pipeline
stream_->submit(pipelineBwd_);
}
{
REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
weight_->getParameterPtr()->incUpdate(callback);
if (biases_ && biases_->getWGrad()) {
biases_->getParameterPtr()->incUpdate(callback);
}
}
}
} // namespace paddle
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "MKLDNNLayer.h"
#include "mkldnn.hpp"
namespace paddle {
/**
* @brief A subclass of MKLDNNLayer fc layer.
*
* The config file api is mkldnn_fc
*/
class MKLDNNFcLayer : public MKLDNNLayer {
protected:
// input layer size, can not be change after init
size_t iLayerSize_; // == ic * ih * iw
// if has already init the weight
bool hasInitedWgt_;
// if input layer has image size info (ih>1 && iw>1)
bool hasSpatial_;
// fc weight and bias
std::unique_ptr<Weight> weight_;
std::unique_ptr<Weight> biases_;
public:
explicit MKLDNNFcLayer(const LayerConfig& config)
: MKLDNNLayer(config), hasInitedWgt_(false), hasSpatial_(true) {}
~MKLDNNFcLayer() {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void convertWeightsFromPaddle() override;
void convertWeightsToPaddle() override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override;
protected:
/**
* reshape the input image sizes
* and reset output buffer size
* and reset mkldnn forward
*/
void reshape();
/**
* reset the forward primitve and memory
* only would be called when input size changes
*/
void resetFwd();
/**
* reset the backward primitve and memory for mkldnn fc
* only would be called when needed
*/
void resetBwd();
};
} // namespace paddle
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "Layer.h"
#include "MKLDNNBase.h"
#include "mkldnn.hpp"
DECLARE_bool(use_mkldnn);
DECLARE_bool(use_mkldnn_wgt);
namespace paddle {
class MKLDNNLayer;
typedef std::shared_ptr<MKLDNNLayer> MKLDNNLayerPtr;
/**
* @brief Base class of MKLDNNlayer.
*
*/
class MKLDNNLayer : public Layer {
protected:
// batch size
int bs_;
// input image channel, height and width
int ic_, ih_, iw_;
// output image channel, height and width
int oc_, oh_, ow_;
// backward also need reset after reset forward handle
bool needResetBwd_;
// mkldnn engine, stream and primivtives
mkldnn::engine engine_;
std::shared_ptr<MKLDNNStream> stream_;
std::shared_ptr<mkldnn::primitive> fwd_;
std::shared_ptr<mkldnn::primitive> bwdWgt_;
std::shared_ptr<mkldnn::primitive> bwdData_;
std::vector<mkldnn::primitive> pipelineFwd_;
std::vector<mkldnn::primitive> pipelineBwd_;
// TODO(TJ): change below memory as MKLDNNMatrixPtr type
std::shared_ptr<mkldnn::memory> inVal_;
std::shared_ptr<mkldnn::memory> inGrad_;
std::shared_ptr<mkldnn::memory> outVal_;
std::shared_ptr<mkldnn::memory> outGrad_;
std::shared_ptr<mkldnn::memory> wgtVal_;
std::shared_ptr<mkldnn::memory> wgtGrad_;
std::shared_ptr<mkldnn::memory> biasVal_;
std::shared_ptr<mkldnn::memory> biasGrad_;
public:
explicit MKLDNNLayer(const LayerConfig& config)
: Layer(config),
bs_(0),
ic_(0),
ih_(0),
iw_(0),
oc_(0),
oh_(0),
ow_(0),
needResetBwd_(true),
engine_(mkldnn::engine::cpu, 0),
stream_(nullptr),
fwd_(nullptr),
bwdWgt_(nullptr),
bwdData_(nullptr) {}
~MKLDNNLayer() {}
virtual bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
if (!Layer::init(layerMap, parameterMap)) {
return false;
}
CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn."
<< "Please set WITH_MKLDNN=ON "
<< "and set use_mkldnn=True";
stream_.reset(new MKLDNNStream());
engine_ = CPUEngine::Instance().getEngine();
// TODO(TJ): deivecId
return true;
}
/**
* convert weight from paddle format to mkldnn format
* weight_ will be override
*/
virtual void convertWeightsFromPaddle() {}
/**
* convert mkldnn weight to paddle format
* weight_ will be override
*/
virtual void convertWeightsToPaddle() {}
/**
* print info about sizes
*/
virtual void printSizeInfo() {
VLOG(MKLDNN_SIZES) << getName() << ": bs: " << bs_ << ", ic: " << ic_
<< ", ih: " << ih_ << ", iw: " << iw_ << ", oc: " << oc_
<< ", oh: " << oh_ << ", ow: " << ow_;
}
// TODO(TJ): move to MkldnnMatrix
// create memory desc
inline mkldnn::memory::desc createMD(
mkldnn::memory::dims dims,
mkldnn::memory::format fmt,
mkldnn::memory::data_type type = mkldnn::memory::data_type::f32) {
// TODO(TJ): isFmtSuppoted(fmt)
return mkldnn::memory::desc(dims, type, fmt);
}
};
} // namespace paddle
...@@ -18,6 +18,15 @@ add_unittest_without_exec(test_LayerGrad ...@@ -18,6 +18,15 @@ add_unittest_without_exec(test_LayerGrad
add_test(NAME test_LayerGrad add_test(NAME test_LayerGrad
COMMAND test_LayerGrad) COMMAND test_LayerGrad)
########## test_Mkldnn layers and activations ##########
if(WITH_MKLDNN)
add_unittest_without_exec(test_MKLDNN
test_MKLDNN.cpp
MKLDNNTester.cpp
LayerGradUtil.cpp)
add_test(NAME test_MKLDNN COMMAND test_MKLDNN)
endif()
################ test_CRFLayerGrad #################### ################ test_CRFLayerGrad ####################
add_unittest_without_exec(test_CRFLayerGrad add_unittest_without_exec(test_CRFLayerGrad
test_CRFLayerGrad.cpp test_CRFLayerGrad.cpp
......
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MKLDNNTester.h"
#include "paddle/gserver/layers/MKLDNNBase.h"
#include "paddle/gserver/layers/MKLDNNLayer.h"
namespace paddle {
// init data layer and test layer of both dnn and reference
void MKLDNNTester::reset(const TestConfig& dnn,
const TestConfig& ref,
size_t batchSize) {
const bool trans = false;
const bool useGpu = false;
// clear
configs_.clear();
layerNames_.clear();
dataLayers_.clear();
datas_.clear();
layerMaps_.clear();
parameters_.clear();
testLayers_.clear();
// resize
configs_.resize(NUM);
layerNames_.resize(NUM);
dataLayers_.resize(NUM);
datas_.resize(NUM);
layerMaps_.resize(NUM);
parameters_.resize(NUM);
testLayers_.resize(NUM);
// reset configs and layer names
configs_[DNN] = dnn;
configs_[REF] = ref;
layerNames_[DNN] = "mkldnn"; // the first is mkldnn layer
layerNames_[REF] = "reference"; // second is reference layer
// reset others
for (size_t i = 0; i < NUM; ++i) {
configs_[i].layerConfig.set_name(layerNames_[i]);
initDataLayer(configs_[i],
&(dataLayers_[i]),
&(datas_[i]),
&(layerMaps_[i]),
layerNames_[i],
batchSize,
trans,
useGpu);
initTestLayer(
configs_[i], &(layerMaps_[i]), &(parameters_[i]), &(testLayers_[i]));
}
dnnLayer_ = testLayers_[DNN];
refLayer_ = testLayers_[REF];
EXPECT_EQ(dataLayers_[DNN].size(), dataLayers_[REF].size());
EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size());
setInputImgSize();
}
void MKLDNNTester::setInputImgSize() {
for (size_t n = 0; n < dataLayers_.size(); ++n) {
for (size_t i = 0; i < dataLayers_[n].size(); ++i) {
// TODO(TJ): fix me when concat and elewise ready
dataLayers_[n][i]->getOutput().setFrameHeight(ih_);
dataLayers_[n][i]->getOutput().setFrameWidth(iw_);
}
}
}
// init randome parameters of ref, and copy to mkldnn
void MKLDNNTester::randomWgtDatas() {
EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size());
for (size_t i = 0; i < parameters_[REF].size(); ++i) {
const VectorPtr& dnnValue = parameters_[DNN][i]->getBuf(PARAMETER_VALUE);
const VectorPtr& refValue = parameters_[REF][i]->getBuf(PARAMETER_VALUE);
parameters_[REF][i]->randomize();
dnnValue->copyFrom(*refValue);
VLOG(lvl_) << "Random weight data " << parameters_[DNN][i]->getName();
printVector(dnnValue);
}
}
// random botdata of ref layer and copy same to mkldnn
void MKLDNNTester::randomBotDatas() {
CHECK_EQ(dataLayers_.size(), NUM);
for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) {
dataLayers_[REF][i]->getOutputValue()->randomizeUniform();
dataLayers_[DNN][i]->getOutputValue()->copyFrom(
*(dataLayers_[REF][i]->getOutputValue()));
VLOG(lvl_) << "Input " << i << " data:";
printMatrix(dataLayers_[REF][i]->getOutputValue());
}
}
void MKLDNNTester::randomTopDiffs() {
refLayer_->getOutputGrad()->randomizeUniform();
dnnLayer_->getOutputGrad()->copyFrom(*(refLayer_->getOutputGrad()));
VLOG(lvl_) << "Random dom Backward Input, TopDiff: ";
printMatrix(refLayer_->getOutputGrad());
}
void MKLDNNTester::checkForward() {
printTopDatas();
double delta = compareMatrix(testLayers_[DNN]->getOutputValue(),
testLayers_[REF]->getOutputValue());
VLOG(MKLDNN_ALL) << "Check Forward";
EXPECT_LE(fabs(delta), eps_);
}
void MKLDNNTester::checkBackwardData() {
// TODO(TJ): uncomment me when batch norm ready
// const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm";
for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) {
const MatrixPtr& dnnDiff = dataLayers_[DNN][i]->getOutputGrad();
const MatrixPtr& refDiff = dataLayers_[REF][i]->getOutputGrad();
VLOG(lvl_) << "Mkldnn Backward Output BotDiff " << i;
printMatrix(dnnDiff);
VLOG(lvl_) << "Reference Backward Output BotDiff " << i;
printMatrix(refDiff);
double delta = compareMatrix(dnnDiff, refDiff);
EXPECT_LE(fabs(delta), eps_);
// TODO(TJ): uncomment me when batch norm ready
// if (isBN) {
// // the other two inputs in batch norm are for moving mean and var
// break;
// }
}
}
void MKLDNNTester::checkBackwardWgts() {
CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size());
vector<VectorPtr> dnnWgts; // used to temply save mkldnn weights
saveWgt(parameters_[DNN], dnnWgts);
const MKLDNNLayerPtr dnnlayer =
std::dynamic_pointer_cast<MKLDNNLayer>(dnnLayer_);
CHECK(dnnlayer);
dnnlayer->convertWeightsToPaddle();
for (size_t i = 0; i < parameters_[DNN].size(); ++i) {
const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE);
const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE);
VLOG(lvl_) << "Mkldnn Output weight " << parameters_[DNN][i]->getName();
printVector(dnn);
VLOG(lvl_) << "Reference Output weight " << parameters_[REF][i]->getName();
printVector(ref);
double delta = compareVector(dnn, ref);
EXPECT_LE(fabs(delta), eps_);
}
VLOG(MKLDNN_ALL) << "Restore dnn weights before comapre";
restoreWgt(dnnWgts, parameters_[DNN]);
}
void MKLDNNTester::saveWgt(const vector<ParameterPtr>& from,
vector<VectorPtr>& to) {
const bool useGpu = false;
to.resize(from.size());
for (size_t i = 0; i < to.size(); ++i) {
const VectorPtr& wgt = from[i]->getBuf(PARAMETER_VALUE);
to[i] = Vector::create(wgt->getSize(), useGpu);
to[i]->copyFrom(*wgt);
}
}
void MKLDNNTester::restoreWgt(const vector<VectorPtr>& from,
vector<ParameterPtr>& to) {
CHECK_EQ(from.size(), to.size());
for (size_t i = 0; i < from.size(); ++i) {
const VectorPtr& wgt = to[i]->getBuf(PARAMETER_VALUE);
wgt->copyFrom(*from[i]);
}
}
// clear parameters grad
void MKLDNNTester::clearWgtDiffs() {
for (size_t n = 0; n < parameters_.size(); ++n) {
for (size_t i = 0; i < parameters_[n].size(); ++i) {
const VectorPtr& grad = parameters_[n][i]->getBuf(PARAMETER_GRADIENT);
if (grad) {
grad->zeroMem();
}
}
}
}
void MKLDNNTester::clearBotDiffs() {
// dnn and ref
for (size_t n = 0; n < dataLayers_.size(); ++n) {
// all inputs layers
for (size_t i = 0; i < dataLayers_[n].size(); ++i) {
dataLayers_[n][i]->getOutputGrad()->zeroMem();
}
}
}
void MKLDNNTester::clearBotDiffs(int n) {
CHECK_LT(n, NUM);
// all inputs layers
for (size_t i = 0; i < dataLayers_[n].size(); ++i) {
dataLayers_[n][i]->getOutputGrad()->zeroMem();
}
}
void MKLDNNTester::clearTopDatas() {
for (size_t i = 0; i < testLayers_.size(); ++i) {
testLayers_[i]->getOutputValue()->zeroMem();
}
}
void MKLDNNTester::printTopDatas() {
if (!log_) {
return;
}
for (int n = 0; n < NUM; ++n) {
VLOG(lvl_) << testLayers_[n]->getType() << " forward output TopData: ";
printMatrix(testLayers_[n]->getOutputValue());
}
}
void MKLDNNTester::printMatrix(const MatrixPtr& m) {
if (!log_) {
return;
}
std::ostringstream ostr;
m->print(ostr);
VLOG(lvl_) << std::endl << ostr.str();
}
void MKLDNNTester::printVector(const VectorPtr& v) {
if (!log_) {
return;
}
std::ostringstream ostr;
v->print(ostr, v->getSize());
VLOG(lvl_) << std::endl << ostr.str();
}
double MKLDNNTester::getDelta(const real* d1,
const real* d2,
size_t len,
const float failRate,
const float thres) {
double delta = 0, sum = 0;
int failCnt = 0;
const double eps = 1e-5;
double maxOut = 0;
for (size_t i = 0; i < len; ++i) {
double ref = fabs(d2[i]);
double diff = fabs(d1[i] - d2[i]);
delta += diff;
sum += ref;
if (ref > eps && fabs(d1[i]) > eps && diff / ref > thres) {
maxOut = std::max(maxOut, diff / ref);
failCnt++;
}
}
EXPECT_TRUE(std::isnormal(sum));
EXPECT_FALSE(std::isinf(sum));
EXPECT_FALSE(std::isnan(delta));
VLOG(MKLDNN_ALL) << "reference avg data: " << sum / len
<< ", delta: " << delta / sum << ", failCnt:" << failCnt;
return (failCnt / (float)len) > failRate ? maxOut : delta / sum;
}
double MKLDNNTester::compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2) {
CHECK_EQ(m1->getElementCnt(), m2->getElementCnt());
return getDelta(m1->getData(), m2->getData(), m1->getElementCnt());
}
double MKLDNNTester::compareVector(const VectorPtr& v1, const VectorPtr& v2) {
CHECK_EQ(v1->getSize(), v2->getSize());
return getDelta(v1->getData(), v2->getData(), v1->getSize());
}
void MKLDNNTester::runOnce() {
// test forward
randomBotDatas();
dnnLayer_->forward(PASS_TRAIN);
refLayer_->forward(PASS_TRAIN);
checkForward();
// test backward
randomTopDiffs();
dnnLayer_->backward(nullptr);
refLayer_->backward(nullptr);
checkBackwardData();
checkBackwardWgts();
// clear buffers
// ref code will addto the diff, dnn code will writeto it
// and clearTopDatas() and clearWgtDiffs() should be coverd by test layers
clearBotDiffs(REF);
}
void MKLDNNTester::run(const TestConfig& dnn,
const TestConfig& ref,
size_t batchSize,
size_t inputImgH,
size_t inputImgW,
size_t iter,
float epsilon,
bool log,
int level) {
VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: " << dnn.layerConfig.type()
<< " vs " << ref.layerConfig.type();
ih_ = inputImgH;
iw_ = inputImgW;
iter_ = iter;
eps_ = epsilon;
log_ = log;
lvl_ = level;
// Firstly test FLAGS_use_mkldnn_wgt = false
FLAGS_use_mkldnn_wgt = false;
// reset and run once
reset(dnn, ref, batchSize);
randomWgtDatas();
clearWgtDiffs();
clearBotDiffs();
for (size_t i = 0; i < iter_; ++i) {
VLOG(MKLDNN_TESTS) << "Check Iteration " << i;
runOnce();
}
// Then test FLAGS_use_mkldnn_wgt = true
FLAGS_use_mkldnn_wgt = true;
// after run once the mkldnn weight has been stored in dnnlayer
// then save the weights and restart again
vector<VectorPtr> dnnWgts, refWgts;
CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size());
saveWgt(parameters_[DNN], dnnWgts);
saveWgt(parameters_[REF], refWgts);
// restart again with flag true
reset(dnn, ref, batchSize);
// restore wgt
restoreWgt(dnnWgts, parameters_[DNN]);
restoreWgt(refWgts, parameters_[REF]);
clearWgtDiffs();
clearBotDiffs();
for (size_t i = 0; i < iter_; ++i) {
VLOG(MKLDNN_TESTS) << "Check Iteration " << i;
runOnce();
}
}
} // namespace paddle
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "LayerGradUtil.h"
#include "paddle/gserver/layers/MKLDNNBase.h"
namespace paddle {
/**
* @brief test the functionality of Mkldnnlayers
* refer to paddle original function
*/
class MKLDNNTester {
enum {
DNN = 0, // MKLDNN layer
REF = 1, // Reference layer
NUM = 2, // Number of total
};
protected:
std::vector<TestConfig> configs_;
vector<string> layerNames_;
vector<vector<DataLayerPtr>> dataLayers_;
vector<vector<Argument>> datas_;
vector<LayerMap> layerMaps_;
vector<vector<ParameterPtr>> parameters_;
vector<LayerPtr> testLayers_;
LayerPtr dnnLayer_, refLayer_;
/// run some iterations, all the result should pass
size_t iter_;
/// whether to print out the details
bool log_;
/// vlog level to print the matrix details datas
int lvl_;
/// epsilon
float eps_;
/// input image size, default 1
size_t ih_, iw_;
public:
explicit MKLDNNTester(size_t iter = 3, float epsilon = 1e-4) {
iter_ = iter;
eps_ = epsilon;
log_ = false;
lvl_ = MKLDNN_ALL;
}
~MKLDNNTester() {}
public:
void run(const TestConfig& dnn,
const TestConfig& ref,
size_t batchSize,
size_t inputImgH = 1,
size_t inputImgW = 1,
size_t iter = 3,
float epsilon = 1e-4,
bool log = false,
int level = MKLDNN_ALL);
void setLogLevel(int lvl) { lvl_ = lvl; }
private:
void reset(const TestConfig& dnn, const TestConfig& ref, size_t batchSize);
void setInputImgSize();
void runOnce();
void randomWgtDatas();
void randomBotDatas();
void randomTopDiffs();
void checkForward();
void checkBackwardData();
void checkBackwardWgts();
void clearWgtDiffs();
void clearBotDiffs();
void clearBotDiffs(int n); // clear specific layer
void clearTopDatas();
void printTopDatas();
void printMatrix(const MatrixPtr& m);
void printVector(const VectorPtr& v);
void saveWgt(const vector<ParameterPtr>& from, vector<VectorPtr>& to);
void restoreWgt(const vector<VectorPtr>& from, vector<ParameterPtr>& to);
double compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2);
double compareVector(const VectorPtr& v1, const VectorPtr& v2);
/**
* Get delta percent
* if many(>failRate) wrong(abs(dnn-ref)/abs(ref)>thres) points return the
* max(diff/ref)
* else return sum(abs(a-b)) / sum(abs(b))
* The return value should smaller than eps when passing.
*/
double getDelta(const real* d1,
const real* d2,
size_t len,
const float failRate = 1e-3,
const float thres = 0.1);
};
} // namespace paddle
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "MKLDNNTester.h"
#include "ModelConfig.pb.h"
using namespace paddle; // NOLINT
DECLARE_bool(thread_local_rand_use_global_seed);
DECLARE_bool(use_gpu);
DECLARE_bool(use_mkldnn);
struct testFCDesc {
int bs;
int ic;
int oc;
int ih, iw; // oh == ow == 1
};
void testFcLayer(const testFCDesc& pm) {
const std::string compareTypes[] = {"mkldnn_fc", "fc"};
TestConfig cfg;
cfg.layerConfig.set_type(compareTypes[0]);
cfg.layerConfig.set_size(pm.oc);
cfg.inputDefs.push_back(
{INPUT_DATA,
"layer_0",
/* size of input layer= */ size_t(pm.ic * pm.ih * pm.iw),
/* size of weight= */ size_t(pm.oc * pm.ic * pm.ih * pm.iw)});
cfg.layerConfig.add_inputs();
MKLDNNTester tester;
for (auto biasSize : {pm.oc, 0}) {
cfg.biasSize = biasSize;
TestConfig ref = cfg;
ref.layerConfig.set_type(compareTypes[1]);
for (auto bs : {pm.bs, 1}) {
tester.run(cfg, ref, bs, pm.ih, pm.iw);
}
}
}
TEST(MKLDNNLayer, FcLayer) {
testFcLayer({/*bs*/ 2, /*ic*/ 2, /*oc*/ 3, /*ih*/ 1, /*iw*/ 1});
testFcLayer({/*bs*/ 3, /*ic*/ 7, /*oc*/ 19, /*ih*/ 1, /*iw*/ 1});
testFcLayer({/*bs*/ 8, /*ic*/ 16, /*oc*/ 32, /*ih*/ 13, /*iw*/ 13});
testFcLayer({/*bs*/ 4, /*ic*/ 12, /*oc*/ 18, /*ih*/ 13, /*iw*/ 11});
testFcLayer({/*bs*/ 2, /*ic*/ 64, /*oc*/ 32, /*ih*/ 16, /*iw*/ 16});
testFcLayer({/*bs*/ 15, /*ic*/ 3, /*oc*/ 6, /*ih*/ 16, /*iw*/ 16});
}
// TODO(TJ): add branch test
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
FLAGS_use_gpu = false;
FLAGS_use_mkldnn = true;
initMain(argc, argv);
FLAGS_thread_local_rand_use_global_seed = true;
srand(1);
return RUN_ALL_TESTS();
}
...@@ -28,6 +28,8 @@ DECLARE_bool(with_cost); ...@@ -28,6 +28,8 @@ DECLARE_bool(with_cost);
DECLARE_bool(with_gpu); DECLARE_bool(with_gpu);
DECLARE_bool(parallel_nn); DECLARE_bool(parallel_nn);
DECLARE_string(config_args); DECLARE_string(config_args);
DECLARE_bool(use_mkldnn);
DECLARE_bool(use_mkldnn_wgt);
const char *kConfigParserModuleName = "paddle.trainer.config_parser"; const char *kConfigParserModuleName = "paddle.trainer.config_parser";
const char *kConfigParserFuncName = "parse_config_and_serialize"; const char *kConfigParserFuncName = "parse_config_and_serialize";
...@@ -44,6 +46,8 @@ TrainerConfigHelper::TrainerConfigHelper(const std::string &configFilePath) ...@@ -44,6 +46,8 @@ TrainerConfigHelper::TrainerConfigHelper(const std::string &configFilePath)
configArgs << "trainer_id=" << FLAGS_trainer_id << ",local=" << FLAGS_local configArgs << "trainer_id=" << FLAGS_trainer_id << ",local=" << FLAGS_local
<< ",with_cost=" << FLAGS_with_cost << ",use_gpu=" << FLAGS_use_gpu << ",with_cost=" << FLAGS_with_cost << ",use_gpu=" << FLAGS_use_gpu
<< ",parallel_nn=" << FLAGS_parallel_nn << ",parallel_nn=" << FLAGS_parallel_nn
<< ",use_mkldnn=" << FLAGS_use_mkldnn
<< ",use_mkldnn_wgt=" << FLAGS_use_mkldnn_wgt
<< ",cudnn_version=" << hl_get_cudnn_lib_version(); << ",cudnn_version=" << hl_get_cudnn_lib_version();
if (!FLAGS_config_args.empty()) { if (!FLAGS_config_args.empty()) {
configArgs << "," << FLAGS_config_args; configArgs << "," << FLAGS_config_args;
......
...@@ -20,6 +20,14 @@ DEFINE_bool(use_gpu, false, "Only support CPU training"); ...@@ -20,6 +20,14 @@ DEFINE_bool(use_gpu, false, "Only support CPU training");
DEFINE_bool(use_gpu, true, "Whether to use GPU for training"); DEFINE_bool(use_gpu, true, "Whether to use GPU for training");
#endif #endif
#ifdef PADDLE_USE_MKLDNN
// TODO(TJ): change to true when MKLDNN layers support multi-inputs
DEFINE_bool(use_mkldnn, false, "Default still keep use CPU training");
#else
DEFINE_bool(use_mkldnn, false, "Only support CPU training");
#endif
DEFINE_bool(use_mkldnn_wgt, false, "Init weight from CPU weight");
DEFINE_bool(parallel_nn, DEFINE_bool(parallel_nn,
false, false,
"Whether to use multi-threads to calculate one neural network." "Whether to use multi-threads to calculate one neural network."
......
...@@ -40,3 +40,5 @@ DECLARE_bool(show_layer_stat); ...@@ -40,3 +40,5 @@ DECLARE_bool(show_layer_stat);
DECLARE_string(predict_file); DECLARE_string(predict_file);
DECLARE_bool(prev_batch_state); DECLARE_bool(prev_batch_state);
DECLARE_string(init_model_path); DECLARE_string(init_model_path);
DECLARE_bool(use_mkldnn);
DECLARE_bool(use_mkldnn_wgt);
...@@ -1604,6 +1604,8 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase): ...@@ -1604,6 +1604,8 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase):
@config_layer('fc') @config_layer('fc')
class FCLayer(LayerBase): class FCLayer(LayerBase):
layer_type = 'fc'
def __init__(self, def __init__(self,
name, name,
size, size,
...@@ -1611,14 +1613,27 @@ class FCLayer(LayerBase): ...@@ -1611,14 +1613,27 @@ class FCLayer(LayerBase):
bias=True, bias=True,
error_clipping_threshold=None, error_clipping_threshold=None,
**xargs): **xargs):
super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs) use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0)))
use_mkldnn_wgt = bool(
int(g_command_config_args.get("use_mkldnn_wgt", 0)))
if use_mkldnn:
self.layer_type = 'mkldnn_fc'
config_assert(
len(inputs) == 1,
"MkldnnFCLayer support one and only one input!")
super(FCLayer, self).__init__(
name, self.layer_type, size, inputs=inputs, **xargs)
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
psize = self.config.size * input_layer.size psize = self.config.size * input_layer.size
dims = [input_layer.size, self.config.size] dims = [input_layer.size, self.config.size]
format = self.inputs[input_index].format format = self.inputs[input_index].format
sparse = format == "csr" or format == "csc" sparse = format == "csr" or format == "csc"
if use_mkldnn:
config_assert(not sparse,
"MkldnnFCLayer do not support sparse format yet")
if use_mkldnn_wgt:
dims = [self.config.size, input_layer.size]
if sparse: if sparse:
psize = self.inputs[input_index].nnz psize = self.inputs[input_index].nnz
else: else:
...@@ -1631,6 +1646,11 @@ class FCLayer(LayerBase): ...@@ -1631,6 +1646,11 @@ class FCLayer(LayerBase):
self.config.error_clipping_threshold = error_clipping_threshold self.config.error_clipping_threshold = error_clipping_threshold
@config_layer('mkldnn_fc')
class MkldnnFcLayer(FCLayer):
layer_type = 'mkldnn_fc'
@config_layer('selective_fc') @config_layer('selective_fc')
class SelectiveFCLayer(LayerBase): class SelectiveFCLayer(LayerBase):
def __init__(self, def __init__(self,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册