/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #pragma once #include #include "Layer.h" #include "MKLDNNBase.h" #include "mkldnn.hpp" #include "paddle/math/MKLDNNMatrix.h" DECLARE_bool(use_mkldnn); namespace paddle { class MKLDNNLayer; typedef std::shared_ptr MKLDNNLayerPtr; /** * @brief Base class of MKLDNNlayer. * */ class MKLDNNLayer : public Layer { protected: // batch size int bs_; // input image channel, height and width int ic_, ih_, iw_; // output image channel, height and width int oc_, oh_, ow_; // backward also need reset after reset forward handle bool needResetBwd_; // mkldnn engine, stream and primivtives mkldnn::engine engine_; std::shared_ptr stream_; std::shared_ptr fwd_; std::shared_ptr bwdWgt_; std::shared_ptr bwdData_; std::vector pipelineFwd_; std::vector pipelineBwd_; // MKLDNNMatrixPtr MKLDNNMatrixPtr inVal_; MKLDNNMatrixPtr inGrad_; MKLDNNMatrixPtr outVal_; MKLDNNMatrixPtr outGrad_; MKLDNNMatrixPtr wgtVal_; MKLDNNMatrixPtr wgtGrad_; MKLDNNMatrixPtr biasVal_; MKLDNNMatrixPtr biasGrad_; public: explicit MKLDNNLayer(const LayerConfig& config) : Layer(config), bs_(0), ic_(0), ih_(0), iw_(0), oc_(0), oh_(0), ow_(0), needResetBwd_(true), engine_(mkldnn::engine::cpu, 0), stream_(nullptr), fwd_(nullptr), bwdWgt_(nullptr), bwdData_(nullptr) {} ~MKLDNNLayer() {} virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." << "Please set WITH_MKLDNN=ON " << "and set use_mkldnn=True"; CHECK(!useGpu_) << "Do not support GPU yet"; // set device id before Layer::init setDevice(MKLDNN_DEVICE); // change param device to MKLDNN device setParamsDevice(MKLDNN_DEVICE, parameterMap); if (!Layer::init(layerMap, parameterMap)) { return false; } stream_.reset(new MKLDNNStream()); engine_ = CPUEngine::Instance().getEngine(); return true; } /** * convert weight from paddle format to mkldnn format * weight_ will be override */ virtual void convertWeightsFromPaddle() {} /** * convert mkldnn weight to paddle format * weight_ will be override */ virtual void convertWeightsToPaddle() {} /** * print info about sizes */ virtual void printSizeInfo() { VLOG(MKLDNN_SIZES) << getName() << ": bs: " << bs_ << ", ic: " << ic_ << ", ih: " << ih_ << ", iw: " << iw_ << ", oc: " << oc_ << ", oh: " << oh_ << ", ow: " << ow_; } /** * Print the mkldnn memory format flow of value */ virtual void printValueFormatFlow() { if (inVal_ && outVal_) { VLOG(MKLDNN_FMTS) << "value format flow --- " << inVal_->getFormat() << " >>> " << outVal_->getFormat(); } } /** * Print the mkldnn memory format flow of grad */ virtual void printGradFormatFlow() { if (inGrad_ && outGrad_) { VLOG(MKLDNN_FMTS) << "grad format flow --- " << inGrad_->getFormat() << " <<< " << outGrad_->getFormat(); } } protected: /** * copy image size and sequence info to other device * @note: can not directly use Layer::copyOutputToOtherDevice since here only * copy base info and do not copy data value */ void copyOutputInfoToOtherDevice() { int cnt = 0; for (size_t i = 0; i < outputOtherDevice_.size(); i++) { outputOtherDevice_[i].setFrameHeight(output_.getFrameHeight()); outputOtherDevice_[i].setFrameWidth(output_.getFrameWidth()); outputOtherDevice_[i].sequenceStartPositions = output_.sequenceStartPositions; outputOtherDevice_[i].subSequenceStartPositions = output_.subSequenceStartPositions; outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims; if (outputOtherDevice_[i].deviceId == CPU_DEVICE) { ++cnt; } } if (cnt > 1) { LOG(WARNING) << "should not have more than one CPU devie"; } } /** * If input only has MKLDNN device. * Otherwise, only support the previous layer using CPU device. */ bool inputIsOnlyMKLDNN(int index = 0) { int prevDevice = getPrev(index)->getDeviceId(); if (prevDevice == MKLDNN_DEVICE) { return true; } else { // do not support GPU yet CHECK_EQ(prevDevice, CPU_DEVICE) << "Only support CPU yet"; return false; } } /** * If output only has MKLDNN device. * Otherwise, other devices should only using CPU device. */ bool outputIsOnlyMKLDNN() { for (size_t i = 0; i < outputOtherDevice_.size(); i++) { CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE) << "Only support other device is CPU yet"; } return outputOtherDevice_.size() == 0; } /** * Sync input value data */ void syncInputValue() { if (inputIsOnlyMKLDNN()) { return; } real* iData = getInputValue(0, CPU_DEVICE)->getData(); // update input data // since it might be changed if this is after data layer inVal_->setData(iData); } /** * Sync output grad data */ void syncOutputGrad() { if (outputIsOnlyMKLDNN()) { return; } // update diff real* oDiff = getOutput(CPU_DEVICE).grad->getData(); outGrad_->setData(oDiff); } /** * Set deviceId of this layer. */ void setDevice(int id) { deviceId_ = id; } /** * Set deviceId of the params used in this layer. */ void setParamsDevice(int id, const ParameterMap& parameterMap) { for (auto& inputConfig : config_.inputs()) { if (inputConfig.has_input_parameter_name()) { ParameterPtr parameter; std::string name = inputConfig.input_parameter_name(); CHECK(mapGet(name, parameterMap, ¶meter)) << "Cannot find input parameter " << name << " for layer " << getName(); parameter->setDevice(id); } } if (config_.has_bias_parameter_name()) { ParameterPtr parameter; std::string name = config_.bias_parameter_name(); CHECK(mapGet(name, parameterMap, ¶meter)) << "Cannot find bias parameter " << name << " for layer " << getName(); parameter->setDevice(id); } } }; } // namespace paddle