diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f8c06c5f868f8d48a9a222b92315ee0ef2cf265e --- /dev/null +++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp @@ -0,0 +1,543 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MKLDNNConvLayer.h" +#include "paddle/math/MathUtils.h" +#include "paddle/utils/Logging.h" + +using namespace mkldnn; // NOLINT +typedef memory::format format; + +namespace paddle { + +REGISTER_LAYER(mkldnn_conv, MKLDNNConvLayer); + +bool MKLDNNConvLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + if (!MKLDNNLayer::init(layerMap, parameterMap)) { + return false; + } + CHECK_EQ(inputLayers_.size(), 1) << "Only support one input layer yet"; + CHECK_EQ(inputLayers_.size(), parameters_.size()); + CHECK(config_.shared_biases()) << "Only support shared biases yet"; + + oc_ = config_.num_filters(); + const ConvConfig& conf = config_.inputs(0).conv_conf(); + ic_ = conf.channels(); + fw_ = conf.filter_size(); + fh_ = conf.filter_size_y(); + pw_ = conf.padding(); + ph_ = conf.padding_y(); + dw_ = conf.dilation(); + dh_ = conf.dilation_y(); + sw_ = conf.stride(); + sh_ = conf.stride_y(); + gp_ = conf.groups(); + oh_ = conf.output_y(); + ow_ = conf.output_x(); + ih_ = conf.img_size_y(); + iw_ = conf.img_size(); + caffeMode_ = conf.caffe_mode(); + CHECK(caffeMode_) << "Only support caffe mode yet"; + CHECK(dh_ == 1 && dw_ == 1) << "Only support dilation 1 yet"; + // check group setting + CHECK_EQ((oc_ / gp_) * gp_, oc_) << "group is indivisible for oc"; + CHECK_EQ((ic_ / gp_) * gp_, ic_) << "group is indivisible for ic"; + + // create weight + size_t height = oc_ / gp_; + size_t width = ic_ * fh_ * fw_; + CHECK_EQ(parameters_[0]->getSize(), height * width); + weight_ = + std::unique_ptr(new Weight(height, width, parameters_[0], 0)); + + // create biases + if (biasParameter_.get() != NULL) { + biases_ = std::unique_ptr(new Weight(1, oc_, biasParameter_)); + } + return true; +} + +void MKLDNNConvLayer::convertWeightsFromPaddle() { + if (hasInitedWgt_) { + return; + } + + CHECK(wgtVal_) << "should have been initialized"; + // the paddle weight format is oihw or goihw + auto targetDim = wgtVal_->getDims(); + auto srcFmt = (gp_ == 1) ? memory::format::oihw : memory::format::goihw; + wgtVal_->reorderDataFrom(wgtVal_, srcFmt, targetDim); + hasInitedWgt_ = true; +} + +void MKLDNNConvLayer::convertWeightsToPaddle() { + CHECK(wgtVal_) << "should have been initialized"; + auto targetDim = wgtVal_->getDims(); + auto dstFmt = (gp_ == 1) ? memory::format::oihw : memory::format::goihw; + wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim); +} + +void MKLDNNConvLayer::reshape( + int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + reshapeInput(bs, ih, iw); + + // cal output sizes + // oc can not be changed + int fh = (fh_ - 1) * dh_ + 1; + int fw = (fw_ - 1) * dw_ + 1; + oh = outputSize(ih, fh, ph_, sh_, caffeMode_); + ow = outputSize(iw, fw, pw_, sw_, caffeMode_); + + reshapeOutput(oh, ow); + resizeOutput(bs, oc * oh * ow); + + printSizeInfo(); +} + +void MKLDNNConvLayer::resetFwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + resetFwdPD(fwdPD_); + + resetFwdBuffers(fwdPD_, in, wgt, bias, out); + + resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out); + + printValueFormatFlow(); +} + +void MKLDNNConvLayer::resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + std::shared_ptr bwdWgtPD; + std::shared_ptr bwdDataPD; + + resetBwdWgtPD(bwdWgtPD); + + resetBwdDataPD(bwdDataPD); + + resetBwdBuffers(bwdWgtPD, bwdDataPD, in, wgt, bias, out); + + resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out); + + printGradFormatFlow(); +} + +void MKLDNNConvLayer::updateInputData() { + cpuInVal_->setData(getInputValue(0, CPU_DEVICE)->getData()); +} + +void MKLDNNConvLayer::updateWeights(const UpdateCallback& callback) { + weight_->getParameterPtr()->incUpdate(callback); + if (biases_ && biases_->getWGrad()) { + biases_->getParameterPtr()->incUpdate(callback); + } +} + +void MKLDNNConvLayer::loadConvSettings(memory::dims& wgt, + memory::dims& bias, + memory::dims& stride, + memory::dims& dilation, + memory::dims& padL, + memory::dims& padR) { + wgt = (gp_ == 1) ? memory::dims{oc_, ic_, fh_, fw_} + : memory::dims{gp_, oc_ / gp_, ic_ / gp_, fh_, fw_}; + bias = memory::dims{oc_}; + stride = memory::dims{sh_, sw_}; + padL = memory::dims{ph_, pw_}; + padR = getPaddingR(); + // note: mkldnn dilation start from 0 + dilation = memory::dims{dh_ - 1, dw_ - 1}; +} + +void MKLDNNConvLayer::resetFwdPD( + std::shared_ptr& pd) { + // dims for conv + memory::dims inDims = memory::dims{bs_, ic_, ih_, iw_}; + memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; + memory::dims wgtDims, biasDims, strides, dilations, padL, padR; + loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); + + prop_kind pk = passType_ == PASS_TEST ? prop_kind::forward_scoring + : prop_kind::forward_training; + algorithm algo = algorithm::convolution_direct; + padding_kind padKind = padding_kind::zero; + conv_fwd::desc fwdDesc = + biases_ && biases_->getW() + ? conv_fwd::desc(pk, + algo, + MKLDNNMatrix::createMemoryDesc(inDims), + MKLDNNMatrix::createMemoryDesc(wgtDims), + MKLDNNMatrix::createMemoryDesc(biasDims), + MKLDNNMatrix::createMemoryDesc(outDims), + strides, + dilations, + padL, + padR, + padKind) + : conv_fwd::desc(pk, + algo, + MKLDNNMatrix::createMemoryDesc(inDims), + MKLDNNMatrix::createMemoryDesc(wgtDims), + MKLDNNMatrix::createMemoryDesc(outDims), + strides, + dilations, + padL, + padR, + padKind); + pd.reset(new conv_fwd::primitive_desc(fwdDesc, engine_)); +} + +void MKLDNNConvLayer::resetFwdBuffers( + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + CHECK(pd); + resetInValue(pd, in); + + resetWgtBiasValue(pd, wgt, bias); + + resetOutValue(pd, out); +} + +void MKLDNNConvLayer::resetFwdPipeline( + std::vector& pipeline, + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + pipeline.clear(); + + if (cvtInVal_) { + pipeline.push_back(*cvtInVal_); + } + + if (bias) { + fwd_.reset(new conv_fwd(*pd, *in, *wgt, *bias, *out)); + } else { + fwd_.reset(new conv_fwd(*pd, *in, *wgt, *out)); + } + pipeline.push_back(*fwd_); + + if (cvtOutVal_) { + pipeline.push_back(*cvtOutVal_); + } +} + +void MKLDNNConvLayer::resetInValue( + std::shared_ptr& pd, MKLDNNMatrixPtr& in) { + const MatrixPtr& inMat = inputLayers_[0]->getOutput().value; + in = MKLDNNMatrix::create(inMat, pd->src_primitive_desc()); + + // create buffer and reorder if input value do not match + cpuInVal_ = nullptr; + cvtInVal_ = nullptr; + if (inputIsOnlyMKLDNN()) { + MKLDNNMatrixPtr dnnIn = std::dynamic_pointer_cast(inMat); + CHECK(dnnIn) << "Input should be MKLDNNMatrix"; + if (dnnIn->getPrimitiveDesc() != in->getPrimitiveDesc()) { + CHECK_EQ(dnnIn->getFormat(), format::nc); + CHECK(ih_ == 1 && iw_ == 1) << "when input is nc format"; + // create a new one with nchw format and same data + memory::dims inDims = memory::dims{bs_, ic_, 1, 1}; + dnnIn = MKLDNNMatrix::create(inMat, inDims, format::nchw, engine_); + CHECK(dnnIn->getPrimitiveDesc() == in->getPrimitiveDesc()); + } + in = dnnIn; + } else { + const MatrixPtr& cpuIn = getInputValue(0, CPU_DEVICE); + memory::dims inDims = memory::dims{bs_, ic_, ih_, iw_}; + cpuInVal_ = MKLDNNMatrix::create(cpuIn, inDims, format::nchw, engine_); + if (cpuInVal_->getPrimitiveDesc() != in->getPrimitiveDesc()) { + // create new mkldnn matrix + in = MKLDNNMatrix::create(nullptr, pd->src_primitive_desc()); + cvtInVal_ = MKLDNNMatrix::createReorder(cpuInVal_, in); + CHECK(cvtInVal_) << "should not be emptry"; + } else { + in = cpuInVal_; + } + } +} + +void MKLDNNConvLayer::resetWgtBiasValue( + std::shared_ptr& pd, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias) { + wgt = MKLDNNMatrix::create(weight_->getW(), pd->weights_primitive_desc()); + VLOG(MKLDNN_FMTS) << "Weight value format: " << wgt->getFormat(); + + bias = nullptr; + if (biases_ && biases_->getW()) { + bias = MKLDNNMatrix::create(biases_->getW(), pd->bias_primitive_desc()); + } +} + +void MKLDNNConvLayer::resetOutValue( + std::shared_ptr& pd, MKLDNNMatrixPtr& out) { + out = MKLDNNMatrix::create(output_.value, pd->dst_primitive_desc()); + + // change original output value from cpu matrix to mkldnn matrix + output_.value = std::dynamic_pointer_cast(out); + + // create reorder if output value has cpu device and pd do not match + cpuOutVal_ = nullptr; + cpuOutVal_ = nullptr; + if (!outputIsOnlyMKLDNN()) { + const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; + memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; + cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_); + if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) { + cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); + CHECK(cvtOutVal_) << "should not be emptry"; + } else { + // CPU output share the same data of MKLDNN output + cpuOut->setData(out->getData()); + cpuOutVal_ = out; + } + } +} + +void MKLDNNConvLayer::resetBwdWgtPD( + std::shared_ptr& pd) { + memory::dims wgtDims, biasDims, strides, dilations, padL, padR; + loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); + + // create backward weight using input, output and weight value memory desc + CHECK(inVal_) << "Should have input value"; + CHECK(outVal_) << "Should have output value"; + CHECK(wgtVal_) << "Should have weight value"; + algorithm algo = algorithm::convolution_direct; + padding_kind padKind = padding_kind::zero; + auto bwdWgtDesc = biasVal_ != nullptr + ? conv_bwdWgt::desc(algo, + inVal_->getMemoryDesc(), + wgtVal_->getMemoryDesc(), + biasVal_->getMemoryDesc(), + outVal_->getMemoryDesc(), + strides, + padL, + padR, + padKind) + : conv_bwdWgt::desc(algo, + inVal_->getMemoryDesc(), + wgtVal_->getMemoryDesc(), + outVal_->getMemoryDesc(), + strides, + padL, + padR, + padKind); + pd.reset(new conv_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_)); + CHECK(pd->src_primitive_desc() == inVal_->getPrimitiveDesc()) + << "primitive desc of in value should equal"; + CHECK(pd->diff_dst_primitive_desc() == outVal_->getPrimitiveDesc()) + << "primitive desc of out grad should equal the out value"; + CHECK(pd->diff_weights_primitive_desc() == wgtVal_->getPrimitiveDesc()) + << "primitive desc of weight grad should equal the weight value"; +} + +void MKLDNNConvLayer::resetBwdDataPD( + std::shared_ptr& pd) { + if (inputLayers_[0]->getOutput().grad == nullptr) { + return; + } + + memory::dims wgtDims, biasDims, strides, dilations, padL, padR; + loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); + CHECK(inVal_) << "Should have input value"; + CHECK(outVal_) << "Should have output value"; + // create backward data using input and output value memory desc + // but using weight memory desc with any format + auto bwdDataDesc = conv_bwdData::desc(algorithm::convolution_direct, + inVal_->getMemoryDesc(), + MKLDNNMatrix::createMemoryDesc(wgtDims), + outVal_->getMemoryDesc(), + strides, + padL, + padR, + padding_kind::zero); + pd.reset(new conv_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_)); + CHECK(pd->diff_src_primitive_desc() == inVal_->getPrimitiveDesc()) + << "primitive desc of in grad should equal the in value"; + CHECK(pd->diff_dst_primitive_desc() == outVal_->getPrimitiveDesc()) + << "primitive desc of out grad should equal"; +} + +void MKLDNNConvLayer::resetBwdBuffers( + std::shared_ptr& wgtPD, + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + CHECK(wgtPD); + resetOutGrad(wgtPD, out); + + resetWgtBiasGrad(wgtPD, wgt, bias); + + resetInGrad(dataPD, in); + + resetWgtValBwdData(dataPD, wgtValBwdData_); +} + +void MKLDNNConvLayer::resetBwdPipeline( + std::vector& pipeline, + std::shared_ptr& wgtPD, + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + pipeline.clear(); + + if (cvtOutGrad_) { + pipeline.push_back(*cvtOutGrad_); + } + + // add bwdWgt handle + if (bias) { + bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt, *bias)); + } else { + bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt)); + } + pipeline.push_back(*bwdWgt_); + + if (dataPD == nullptr) { + return; + } + + if (cvtWgtVal_) { + pipeline.push_back(*cvtWgtVal_); + } + + // add bwdData handle + CHECK(wgtValBwdData_) << "Should have weight memory"; + bwdData_.reset(new conv_bwdData(*dataPD, *out, *wgtValBwdData_, *in)); + pipeline.push_back(*bwdData_); + + if (cvtInGrad_) { + pipeline.push_back(*cvtInGrad_); + } +} + +void MKLDNNConvLayer::resetOutGrad( + std::shared_ptr& wgtPD, MKLDNNMatrixPtr& out) { + const MatrixPtr& outMat = output_.grad; + out = MKLDNNMatrix::create(outMat, wgtPD->diff_dst_primitive_desc()); + CHECK(outVal_ != nullptr && + out->getPrimitiveDesc() == outVal_->getPrimitiveDesc()) + << "primitive desc of out grad and value should be equal"; + + // TODO(TJ): merge outgrad + // create reorder if has output grad does not match + cpuOutGrad_ = nullptr; + cvtOutGrad_ = nullptr; + if (!outputIsOnlyMKLDNN()) { + const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; + // same PrimitiveDesc with cpuInVal_ + CHECK(cpuOutVal_); + cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc()); + if (cpuOutGrad_->getPrimitiveDesc() == out->getPrimitiveDesc()) { + outMat->setData(cpuOut->getData()); + out = cpuOutGrad_; + } else { + cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out); + CHECK(cvtOutGrad_); + } + } +} + +void MKLDNNConvLayer::resetWgtBiasGrad( + std::shared_ptr& wgtPD, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias) { + wgt = MKLDNNMatrix::create(weight_->getWGrad(), + wgtPD->diff_weights_primitive_desc()); + CHECK(nullptr != wgtVal_ && + wgt->getPrimitiveDesc() == wgtVal_->getPrimitiveDesc()) + << "primitive desc of weight grad and value should be equal"; + VLOG(MKLDNN_FMTS) << "weight grad format: " << wgt->getFormat(); + + if (biasVal_ == nullptr) { + return; + } + bias = MKLDNNMatrix::create(biases_->getWGrad(), + wgtPD->diff_bias_primitive_desc()); + CHECK(bias->getPrimitiveDesc() == biasVal_->getPrimitiveDesc()) + << "primitive desc of bias grad should equal the bias value"; +} + +void MKLDNNConvLayer::resetInGrad( + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in) { + if (dataPD == nullptr) { + return; + } + + // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done + in = MKLDNNMatrix::create(inputLayers_[0]->getOutput().grad, + dataPD->diff_src_primitive_desc()); + CHECK(nullptr != inVal_ && + in->getPrimitiveDesc() == inVal_->getPrimitiveDesc()) + << "primitive desc of input grad and value should be equal"; + + // create reorder if has output grad does not match + cpuInGrad_ = nullptr; + cvtInGrad_ = nullptr; + if (!inputIsOnlyMKLDNN()) { + const MatrixPtr& cpuIn = getInputGrad(0, CPU_DEVICE); + // same PrimitiveDesc with cpuInVal_ + CHECK(cpuInVal_); + cpuInGrad_ = MKLDNNMatrix::create(cpuIn, cpuInVal_->getPrimitiveDesc()); + if (cpuInGrad_->getPrimitiveDesc() != in->getPrimitiveDesc()) { + const MatrixPtr& dnnIn = getInputGrad(0, MKLDNN_DEVICE); + in = MKLDNNMatrix::create(dnnIn, in->getPrimitiveDesc()); + cvtInGrad_ = MKLDNNMatrix::createReorder(in, cpuInGrad_); + CHECK(cvtInGrad_); + } else { + in = cpuInGrad_; + } + } +} + +void MKLDNNConvLayer::resetWgtValBwdData( + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& wgt) { + if (dataPD == nullptr) { + return; + } + + // create new weight value for backward data, and create reorder if necessary + // since the primitive_desc would be different with wgtVal_ + CHECK(wgtVal_) << "should have weight value"; + if (dataPD->weights_primitive_desc() != wgtVal_->getPrimitiveDesc()) { + wgtValBwdData_ = + MKLDNNMatrix::create(nullptr, dataPD->weights_primitive_desc()); + cvtWgtVal_ = MKLDNNMatrix::createReorder(wgtVal_, wgtValBwdData_); + CHECK(cvtWgtVal_); + } else { + wgtValBwdData_ = wgtVal_; + } + VLOG(MKLDNN_FMTS) << "weight value format for backward data" + << wgtValBwdData_->getFormat(); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNConvLayer.h b/paddle/gserver/layers/MKLDNNConvLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..f84f2f737c47a1b8adc2b83360a0396ffbc6ae24 --- /dev/null +++ b/paddle/gserver/layers/MKLDNNConvLayer.h @@ -0,0 +1,253 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "MKLDNNLayer.h" +#include "mkldnn.hpp" + +namespace paddle { +typedef mkldnn::convolution_forward conv_fwd; +typedef mkldnn::convolution_backward_weights conv_bwdWgt; +typedef mkldnn::convolution_backward_data conv_bwdData; + +/** + * @brief A subclass of MKLDNNLayer conv layer. + * + * The config file api is mkldnn_conv + */ +class MKLDNNConvLayer : public MKLDNNLayer { +protected: + // padding height and width + int ph_, pw_; + // stride height and width + int sh_, sw_; + // dilation height and width + int dh_, dw_; + // filter(kenerl) height and width + int fh_, fw_; + // group number + int gp_; + + // in resetBwdData, the format of wgtValBwdData_ is different with wgtVal_ + MKLDNNMatrixPtr wgtValBwdData_; + // convert handle from wgtVal_ to wgtValBwdData_ + std::shared_ptr cvtWgtVal_; + + // save forward primitive_desc, which can be used backward + std::shared_ptr fwdPD_; + + // MKLDNNMatrixPtr which should be created from CPU Device + MKLDNNMatrixPtr cpuInVal_; + MKLDNNMatrixPtr cpuInGrad_; + MKLDNNMatrixPtr cpuOutVal_; + MKLDNNMatrixPtr cpuOutGrad_; + // convert handle between CPU device and MKLDNN device + std::shared_ptr cvtInVal_; + std::shared_ptr cvtInGrad_; + std::shared_ptr cvtOutVal_; + std::shared_ptr cvtOutGrad_; + + // whether the weight has been init + bool hasInitedWgt_; + + // true by default, which impact the calculation of output image size. + // details can refer to mathUtil.h + bool caffeMode_; + + // weight and bias + std::unique_ptr weight_; + std::unique_ptr biases_; + +public: + explicit MKLDNNConvLayer(const LayerConfig& config) + : MKLDNNLayer(config), hasInitedWgt_(false), caffeMode_(true) {} + + ~MKLDNNConvLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void reshape( + int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + + void resetFwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) override; + + void resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) override; + + void updateInputData() override; + + void updateWeights(const UpdateCallback& callback) override; + + void convertWeightsFromPaddle() override; + + void convertWeightsToPaddle() override; + + void printSizeInfo() override { + MKLDNNLayer::printSizeInfo(); + VLOG(MKLDNN_SIZES) << getName() << ": fh: " << fh_ << ", fw: " << fw_ + << ": ph: " << ph_ << ", pw: " << pw_ << ", sh: " << sh_ + << ", sw: " << sw_ << ", dh: " << dh_ << ", dw: " << dw_; + } + + void printValueFormatFlow() override { + if (cpuInVal_) { + VLOG(MKLDNN_FMTS) << cpuInVal_->getFormat() << " >>>"; + } + MKLDNNLayer::printValueFormatFlow(); + if (cpuOutVal_) { + VLOG(MKLDNN_FMTS) << " >>> " << cpuOutVal_->getFormat(); + } + } + + void printGradFormatFlow() override { + if (cpuInGrad_) { + VLOG(MKLDNN_FMTS) << cpuInGrad_->getFormat() << " <<<"; + } + MKLDNNLayer::printGradFormatFlow(); + if (cpuOutGrad_) { + VLOG(MKLDNN_FMTS) << " <<< " << cpuOutGrad_->getFormat(); + } + } + +protected: + /** + * load the dims settings of this conv + */ + void loadConvSettings(mkldnn::memory::dims& wgt, + mkldnn::memory::dims& bias, + mkldnn::memory::dims& stride, + mkldnn::memory::dims& dilation, + mkldnn::memory::dims& padL, + mkldnn::memory::dims& padR); + + /** + * reset the forward primitive descriptor. + */ + void resetFwdPD(std::shared_ptr& pd); + /** + * reset the MKLDNNMatrix buffers used in forward. + */ + void resetFwdBuffers(std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + /** + * reset the forward pipeline. + */ + void resetFwdPipeline(std::vector& pipeline, + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + + /** + * reset MKLDNNMatrix of input value + */ + void resetInValue(std::shared_ptr& pd, + MKLDNNMatrixPtr& in); + /** + * reset MKLDNNMatrix of weight and bias value + */ + void resetWgtBiasValue(std::shared_ptr& pd, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias); + /** + * reset MKLDNNMatrix of output value + */ + void resetOutValue(std::shared_ptr& pd, + MKLDNNMatrixPtr& out); + + /** + * reset the backward weight primitive descriptor. + */ + void resetBwdWgtPD(std::shared_ptr& pd); + /** + * reset the backward data primitive descriptor. + */ + void resetBwdDataPD(std::shared_ptr& pd); + /** + * reset the MKLDNNMatrix buffers used in backward. + */ + void resetBwdBuffers(std::shared_ptr& wgtPD, + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + /** + * reset the backward pipeline. + */ + void resetBwdPipeline(std::vector& pipeline, + std::shared_ptr& wgtPD, + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + + /** + * reset MKLDNNMatrix of output grad + */ + void resetOutGrad(std::shared_ptr& wgtPD, + MKLDNNMatrixPtr& out); + /** + * reset MKLDNNMatrix of weight and bias grad + */ + void resetWgtBiasGrad(std::shared_ptr& wgtPD, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias); + /** + * reset MKLDNNMatrix of input grad + */ + void resetInGrad(std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in); + /** + * reset MKLDNNMatrix of weight value for backward data + * since the primitive_desc would be different with wgtVal_ + */ + void resetWgtValBwdData(std::shared_ptr& dataPD, + MKLDNNMatrixPtr& wgt); + + /** + * get padding_r according to + * https://github.com/01org/mkl-dnn/blob/master/tests/gtests/ + * test_convolution_forward_common.hpp + * @note: mkldnn dilation start from 0 while paddle start from 1 + */ + mkldnn::memory::dims getPaddingR() const { + mkldnn::memory::dims padR = {ph_, pw_}; + for (int i = 0; i < 2; ++i) { + if ((ih_ - ((fh_ - 1) * dh_ + 1) + ph_ + padR[0]) / sh_ + 1 != oh_) { + ++padR[0]; + } + if ((iw_ - ((fw_ - 1) * dw_ + 1) + pw_ + padR[1]) / sw_ + 1 != ow_) { + ++padR[1]; + } + } + return padR; + } +}; + +} // namespace paddle diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp index e1d2270df24331914f3a51acc90a518084b3ce4e..e70802881e3f22160a87b7a4babda07ffbcf9d6f 100644 --- a/paddle/gserver/tests/test_MKLDNN.cpp +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -17,6 +17,7 @@ limitations under the License. */ #include #include "MKLDNNTester.h" #include "ModelConfig.pb.h" +#include "paddle/math/MathUtils.h" using namespace paddle; // NOLINT @@ -63,6 +64,83 @@ TEST(MKLDNNLayer, FcLayer) { testFcLayer({/*bs*/ 15, /*ic*/ 3, /*oc*/ 6, /*ih*/ 16, /*iw*/ 16}); } +struct testConvDesc { + int bs, gp; + int ic, ih, iw; + int oc, oh, ow; + int fh, fw; + int ph, pw; + int sh, sw; + int dh, dw; +}; + +void testConvLayer(const testConvDesc& pm) { + const std::string compareTypes[] = {"mkldnn_conv", "exconv"}; + TestConfig cfg; + cfg.layerConfig.set_type(compareTypes[0]); + cfg.layerConfig.set_num_filters(pm.oc); + cfg.layerConfig.set_size(pm.oc * pm.oh * pm.ow); + // cfg.layerConfig.set_partial_sum(1); // TODO: check it + cfg.layerConfig.set_shared_biases(true); + cfg.inputDefs.push_back( + {INPUT_DATA, + "layer_0", + /* size of input layer= */ size_t(pm.ic * pm.ih * pm.iw), + /* size of weight= */ size_t(pm.oc * pm.ic * pm.fh * pm.fw / pm.gp)}); + LayerInputConfig* input = cfg.layerConfig.add_inputs(); + ConvConfig* conv = input->mutable_conv_conf(); + conv->set_groups(pm.gp); + conv->set_img_size(pm.iw); + conv->set_img_size_y(pm.ih); + conv->set_output_x(pm.ow); + conv->set_output_y(pm.oh); + conv->set_filter_size(pm.fw); + conv->set_filter_size_y(pm.fh); + conv->set_channels(pm.ic); + conv->set_padding(pm.pw); + conv->set_padding_y(pm.ph); + conv->set_stride(pm.sw); + conv->set_stride_y(pm.sh); + conv->set_dilation(pm.dw); + conv->set_dilation_y(pm.dh); + conv->set_caffe_mode(true); + conv->set_filter_channels(conv->channels() / conv->groups()); + CHECK_EQ(conv->filter_channels() * pm.gp, conv->channels()) + << "it is indivisible"; + + int fh = (pm.fh - 1) * pm.dh + 1; + int fw = (pm.fw - 1) * pm.dw + 1; + int ow = outputSize(pm.iw, fw, pm.pw, pm.sw, true); + int oh = outputSize(pm.ih, fh, pm.ph, pm.sh, true); + CHECK_EQ(ow, pm.ow) << "output size check failed"; + CHECK_EQ(oh, pm.oh) << "output size check failed"; + + MKLDNNTester tester; + for (auto biasSize : {pm.oc, 0}) { + cfg.biasSize = biasSize; + TestConfig ref = cfg; + ref.layerConfig.set_type(compareTypes[1]); + for (auto bs : {pm.bs, 1}) { + tester.run(cfg, ref, bs, pm.ih, pm.iw); + } + } +} + +TEST(MKLDNNLayer, ConvLayer) { + /* bs, gp, ic, ih, iw, oc, oh, ow, fh, fw, ph, pw, sh, sw, dh, dw */ + testConvLayer({2, 1, 3, 32, 32, 16, 32, 32, 3, 3, 1, 1, 1, 1, 1, 1}); + testConvLayer({2, 1, 8, 16, 16, 8, 16, 16, 3, 3, 1, 1, 1, 1, 1, 1}); + testConvLayer({3, 1, 16, 32, 32, 3, 32, 32, 3, 3, 1, 1, 1, 1, 1, 1}); + testConvLayer({8, 1, 16, 18, 18, 32, 18, 18, 3, 3, 1, 1, 1, 1, 1, 1}); + testConvLayer({16, 1, 1, 42, 31, 32, 23, 11, 4, 5, 3, 2, 2, 3, 1, 1}); + testConvLayer({2, 1, 8, 16, 16, 8, 8, 8, 3, 3, 1, 1, 2, 2, 1, 1}); + testConvLayer({3, 1, 8, 13, 13, 8, 7, 7, 3, 3, 1, 1, 2, 2, 1, 1}); + // with groups + testConvLayer({2, 2, 4, 5, 5, 8, 5, 5, 3, 3, 1, 1, 1, 1, 1, 1}); + testConvLayer({2, 3, 3, 5, 5, 3, 5, 5, 3, 3, 1, 1, 1, 1, 1, 1}); + testConvLayer({4, 4, 16, 3, 3, 16, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1}); +} + // TODO(TJ): add branch test int main(int argc, char** argv) { diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp index c4063e5069854242d9f93886b66580385557ca73..0778bb63b7b3bca9b3d2647ca43dad72d783950a 100644 --- a/paddle/math/MKLDNNMatrix.cpp +++ b/paddle/math/MKLDNNMatrix.cpp @@ -49,6 +49,27 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, return create(m, memory::primitive_desc(memory::desc(dims, dtype, fmt), eg)); } +std::shared_ptr MKLDNNMatrix::createReorder(const MKLDNNMatrixPtr& src, + const MKLDNNMatrixPtr& dst, + bool checkData) { + if (src == dst || src->getPrimitiveDesc() == dst->getPrimitiveDesc()) { + return nullptr; + } + + if (checkData && (src->getData() == dst->getData())) { + LOG(FATAL) << "can not create reorder with inplace data"; + return nullptr; + } + + memory::dims srcDims = src->getDims(); + memory::dims dstDims = dst->getDims(); + CHECK_EQ(srcDims.size(), dstDims.size()); + for (size_t i = 0; i < srcDims.size(); ++i) { + CHECK_EQ(srcDims[i], dstDims[i]); + } + return std::make_shared(*src, *dst); +} + void MKLDNNMatrix::reorderDataFrom(const MKLDNNMatrixPtr& m, memory::format srcFmt, memory::dims targetDim) { diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index eef3b429e6fa0087aeac3f5aed9dff983b06e826..0aa130b4a0d458ad78d5d1330164af9e73b22a44 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -52,6 +52,31 @@ public: mkldnn::engine& eg, mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32); + /** + * Create Memory descriptor. + * default with any format and f32 dtype + */ + static mkldnn::memory::desc createMemoryDesc( + const mkldnn::memory::dims& dims, + const mkldnn::memory::format& fmt = mkldnn::memory::format::any, + const mkldnn::memory::data_type& dtype = mkldnn::memory::data_type::f32) { + return mkldnn::memory::desc(dims, dtype, fmt); + } + + /** + * Create reorder primitive. + * Create a mkldnn::reorder handle for converting src MKLDNNMatrix to dst. + * checkData: for whether to check the data handle of src and dst is the same. + * if true, means check it and do not want support inplace reorder; + * otherwise do not check data which means the created reorder + * maybe inplace buffer and do not guarantee the logical is correct + * since not all format or conversion support inplace. + */ + static std::shared_ptr createReorder( + const MKLDNNMatrixPtr& src, + const MKLDNNMatrixPtr& dst, + bool checkData = true); + public: /** * Reorder this MKLDNNMatrix from other format. diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 4f68a8953446ffa0510df65c5b214d09b913cff8..a9e1d6d2e06d56f837690ec95fa8f8d41a90725f 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2055,20 +2055,26 @@ class ConvLayerBase(LayerBase): if num_filters is not None: self.config.num_filters = num_filters + use_mkldnn = int(g_command_config_args.get("use_mkldnn", 0)) use_gpu = int(g_command_config_args.get("use_gpu", 0)) parallel_nn = int(g_command_config_args.get("parallel_nn", 0)) - # Automatically select cudnn_type for GPU and exconv for CPU + # Automatically select cudnn_type for GPU, exconv for CPU + # and mkldnn_conv for MKLDNN # if set type=conv, but still reserve the way user specify - # exconv or cudnn_conv manually. + # exconv, mkldnn_conv or cudnn_conv manually. if self.layer_type == "cudnn_conv": config_assert(use_gpu, "cudnn_conv only support GPU") + if self.layer_type == "mkldnn_conv": + config_assert(use_mkldnn, "mkldnn_conv only support MKLDNN") + if (use_gpu == 1 and self.layer_type != "exconv" and + self.layer_type != "mkldnn_conv" and (parallel_nn == 0 or self.config.device > -1)): self.layer_type = "cudnn_conv" else: - self.layer_type = "exconv" + self.layer_type = "mkldnn_conv" if use_mkldnn else "exconv" # need to specify layer in config self.config.type = self.layer_type @@ -2100,6 +2106,11 @@ class ConvLayer(ConvLayerBase): layer_type = 'exconv' +@config_layer('mkldnn_conv') +class ConvLayer(ConvLayerBase): + layer_type = 'mkldnn_conv' + + @config_layer('cudnn_conv') class ConvLayer(ConvLayerBase): layer_type = 'cudnn_conv'