refine MKLDNNLayer logical: move forward and backward to MKLDNNLayer and...

refine MKLDNNLayer logical: move forward and backward to MKLDNNLayer and remove copyOutputInfoToOtherDevice

refine MKLDNNLayer logical: move forward and backward to MKLDNNLayer and...
refine MKLDNNLayer logical: move forward and backward to MKLDNNLayer and remove copyOutputInfoToOtherDevice
94ea8ee0 · tensor-tang · f40d5f58 · 94ea8ee0 · 94ea8ee0 · 94ea8ee0
3 changed file
--- a/paddle/gserver/layers/MKLDNNFcLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp
@@ -14,7 +14,6 @@ limitations under the License. */
 #include "MKLDNNFcLayer.h"
 #include "paddle/utils/Logging.h"
-#include "paddle/utils/Stat.h"
 using namespace mkldnn;  // NOLINT
 typedef memory::format format;
@@ -40,6 +39,8 @@ bool MKLDNNFcLayer::init(const LayerMap& layerMap,
  oc_ = getSize();
  oh_ = 1;
  ow_ = 1;
+  ih_ = 1;
+  iw_ = 1;
  // input size can not change in FC
  iLayerSize_ = inputLayers_[0]->getSize();
@@ -78,36 +79,17 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
 }
 void MKLDNNFcLayer::reshape() {
-  const Argument& input = getInput(0, getPrev(0)->getDeviceId());
+  reshapeInput();
-  int batchSize = input.getBatchSize();
-  if (bs_ == batchSize) {
-    return;
-  }
-  bs_ = batchSize;
-  ih_ = input.getFrameHeight();
-  iw_ = input.getFrameWidth();
-  if (ih_ == 0) {
-    ih_ = 1;
-  }
-  if (iw_ == 0) {
-    iw_ = 1;
-  }
  CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize());
  ic_ = iLayerSize_ / (ih_ * iw_);
  CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible";
  CHECK_EQ(size_t(oc_), getSize());
-  printSizeInfo();
-  // reset output
+  reshapeOutput(oh_, ow_);
-  output_.setFrameHeight(oh_);
+  resizeOutput(bs_, oc_);
-  output_.setFrameWidth(ow_);
-  resetOutput(bs_, oc_);
-  // reset mkldnn forward
+  printSizeInfo();
-  resetFwd();
-  needResetBwd_ = true;
-  convertWeightsFromPaddle();
 }
 void MKLDNNFcLayer::resetFwd() {
@@ -137,7 +119,6 @@ void MKLDNNFcLayer::resetFwd() {
  // change original output value to mkldnn output value
  output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
  if (!outputIsOnlyMKLDNN()) {
-    copyOutputInfoToOtherDevice();
    // fc cpu output value do not need create convert
    // just share point
    getOutput(CPU_DEVICE).value->setData(output_.value->getData());
@@ -243,51 +224,13 @@ void MKLDNNFcLayer::resetBwd() {
 }
 void MKLDNNFcLayer::updateInputData() {
-  if (inputLayers_[0]->getType() != "data") {
+  inVal_->setData(getInputValue(0, CPU_DEVICE)->getData());
-    return;
-  }
-  real* iData = getInputValue(0, CPU_DEVICE)->getData();
-  inVal_->setData(iData);
-}
-void MKLDNNFcLayer::forward(PassType passType) {
-  Layer::forward(passType);
-  reshape();
-  {
-    REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str());
-    updateInputData();
-    // just submit forward pipeline
-    stream_->submit(pipelineFwd_);
-  }
-  /* activation */ {
-    REGISTER_TIMER_INFO("FwActTimer", getName().c_str());
-    forwardActivation();
-  }
 }
-void MKLDNNFcLayer::backward(const UpdateCallback& callback) {
+void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) {
-  /* Do derivation */ {
+  weight_->getParameterPtr()->incUpdate(callback);
-    REGISTER_TIMER_INFO("BpActTimer", getName().c_str());
+  if (biases_ && biases_->getWGrad()) {
-    backwardActivation();
+    biases_->getParameterPtr()->incUpdate(callback);
-  }
-  {
-    REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str());
-    resetBwd();
-    // just sumbmit backward pipeline
-    stream_->submit(pipelineBwd_);
-  }
-  {
-    REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
-    weight_->getParameterPtr()->incUpdate(callback);
-    if (biases_ && biases_->getWGrad()) {
-      biases_->getParameterPtr()->incUpdate(callback);
-    }
  }
 }
 }  // namespace paddle
--- a/paddle/gserver/layers/MKLDNNFcLayer.h
+++ b/paddle/gserver/layers/MKLDNNFcLayer.h
@@ -45,35 +45,19 @@ public:
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
-  void convertWeightsFromPaddle() override;
+  void reshape() override;
-  void convertWeightsToPaddle() override;
-  void forward(PassType passType) override;
+  void resetFwd() override;
-  void backward(const UpdateCallback& callback) override;
+  void resetBwd() override;
  void updateInputData() override;
-protected:
+  void updateWeights(const UpdateCallback& callback) override;
-  /**
-   * reshape the input image sizes
+  void convertWeightsFromPaddle() override;
-   * and reset output buffer size
-   * and reset mkldnn forward
+  void convertWeightsToPaddle() override;
-   */
-  void reshape();
-  /**
-   * reset the forward primitve and memory
-   * only would be called when input size changes
-   */
-  void resetFwd();
-  /**
-   * reset the backward primitve and memory for mkldnn fc
-   * only would be called when needed
-   */
-  void resetBwd();
 };
 }  // namespace paddle
--- a/paddle/gserver/layers/MKLDNNLayer.h
+++ b/paddle/gserver/layers/MKLDNNLayer.h
@@ -19,6 +19,7 @@ limitations under the License. */
 #include "MKLDNNBase.h"
 #include "mkldnn.hpp"
 #include "paddle/math/MKLDNNMatrix.h"
+#include "paddle/utils/Stat.h"
 DECLARE_bool(use_mkldnn);
@@ -33,6 +34,8 @@ typedef std::shared_ptr<MKLDNNLayer> MKLDNNLayerPtr;
 */
 class MKLDNNLayer : public Layer {
 protected:
+  // input value element count
+  size_t inputElemenCnt_;
  // batch size
  int bs_;
  // input image channel, height and width
@@ -52,7 +55,7 @@ protected:
  std::vector<mkldnn::primitive> pipelineFwd_;
  std::vector<mkldnn::primitive> pipelineBwd_;
-  // MKLDNNMatrixPtr
+  // MKLDNNMatrixPtr with internal format
  MKLDNNMatrixPtr inVal_;
  MKLDNNMatrixPtr inGrad_;
  MKLDNNMatrixPtr outVal_;
@@ -65,6 +68,7 @@ protected:
 public:
  explicit MKLDNNLayer(const LayerConfig& config)
      : Layer(config),
+        inputElemenCnt_(0),
        bs_(0),
        ic_(0),
        ih_(0),
@@ -95,12 +99,93 @@ public:
    if (!Layer::init(layerMap, parameterMap)) {
      return false;
    }
+    checkCPUOutputsNumber();
    stream_.reset(new MKLDNNStream());
    engine_ = CPUEngine::Instance().getEngine();
    return true;
  }
+  void forward(PassType passType) override {
+    passType_ = passType;
+    {
+      REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str());
+      copySeqInfoToOutputs();
+      CHECK(!inputLayers_.empty());
+      size_t elemenCnt = inputLayers_[0]->getOutput().value->getElementCnt();
+      if (inputElemenCnt_ != elemenCnt) {
+        inputElemenCnt_ = elemenCnt;
+        reshape();
+        resetFwd();
+        convertWeightsFromPaddle();
+        needResetBwd_ = true;
+      }
+      if (inputLayers_[0]->getType() == "data") {
+        updateInputData();
+      }
+      stream_->submit(pipelineFwd_);
+    }
+    /* activation */ {
+      REGISTER_TIMER_INFO("FwActTimer", getName().c_str());
+      forwardActivation();
+    }
+  }
+  void backward(const UpdateCallback& callback) override {
+    /* Do derivation */ {
+      REGISTER_TIMER_INFO("BpActTimer", getName().c_str());
+      backwardActivation();
+    }
+    {
+      REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str());
+      if (needResetBwd_) {
+        resetBwd();
+        needResetBwd_ = false;
+      }
+      stream_->submit(pipelineBwd_);
+    }
+    {
+      REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
+      updateWeights(callback);
+    }
+  }
+  /**
+   * reshape the input image sizes
+   * and reset output image and buffer size
+   */
+  virtual void reshape() = 0;
+  /**
+   * reset the mkldnn forward primitve and memory
+   * only would be called when input size changes
+   */
+  virtual void resetFwd() = 0;
+  /**
+   * reset the mkldnn backward primitve and memory for mkldnn fc
+   * only would be called when needed
+   */
+  virtual void resetBwd() = 0;
+  /**
+   * Update input value data when input layer is "data" type.
+   * Since the input value data address might be changed.
+   */
+  virtual void updateInputData() {}
+  /**
+   * Update weights and biases if necessary.
+   */
+  virtual void updateWeights(const UpdateCallback& callback) {}
  /**
   * convert weight from paddle format to mkldnn format
   * weight_ will be override
@@ -114,10 +199,38 @@ public:
  virtual void convertWeightsToPaddle() {}
  /**
-   * Update input value data when input layer is "data" type.
+   * add this interface as public for unit test
-   * Since the input value data address might be changed.
   */
-  virtual void updateInputData() {}
+  void addOutputArgument(int deviceId) { Layer::addOutputArgument(deviceId); }
+protected:
+  /**
+   * reshape the input image sizes and input batchsize
+   */
+  virtual void reshapeInput() {
+    const Argument& input = inputLayers_[0]->getOutput();
+    bs_ = input.getBatchSize();
+    int height = input.getFrameHeight();
+    int width = input.getFrameWidth();
+    if (height != 0) {
+      ih_ = height;
+    }
+    if (width != 0) {
+      iw_ = width;
+    }
+  }
+  /**
+   * reshape output image sizes
+   */
+  virtual void reshapeOutput(size_t height, size_t width) {
+    output_.setFrameHeight(height);
+    output_.setFrameWidth(width);
+    for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
+      outputOtherDevice_[i].setFrameHeight(height);
+      outputOtherDevice_[i].setFrameWidth(width);
+    }
+  }
  /**
   * print info about sizes
@@ -133,8 +246,8 @@ public:
   */
  virtual void printValueFormatFlow() {
    if (inVal_ && outVal_) {
-      VLOG(MKLDNN_FMTS) << "value format flow --- " << inVal_->getFormat()
+      VLOG(MKLDNN_FMTS) << inVal_->getFormat() << " >>> "
-                        << " >>> " << outVal_->getFormat();
+                        << outVal_->getFormat();
    }
  }
@@ -143,36 +256,12 @@ public:
   */
  virtual void printGradFormatFlow() {
    if (inGrad_ && outGrad_) {
-      VLOG(MKLDNN_FMTS) << "grad format flow --- " << inGrad_->getFormat()
+      VLOG(MKLDNN_FMTS) << inGrad_->getFormat() << " <<< "
-                        << " <<< " << outGrad_->getFormat();
+                        << outGrad_->getFormat();
    }
  }
 protected:
-  /**
-   * copy image size and sequence info to other device
-   * @note: can not directly use Layer::copyOutputToOtherDevice since here only
-   *        copy base info and do not copy data value
-   */
-  void copyOutputInfoToOtherDevice() {
-    int cnt = 0;
-    for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
-      outputOtherDevice_[i].setFrameHeight(output_.getFrameHeight());
-      outputOtherDevice_[i].setFrameWidth(output_.getFrameWidth());
-      outputOtherDevice_[i].sequenceStartPositions =
-          output_.sequenceStartPositions;
-      outputOtherDevice_[i].subSequenceStartPositions =
-          output_.subSequenceStartPositions;
-      outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims;
-      if (outputOtherDevice_[i].deviceId == CPU_DEVICE) {
-        ++cnt;
-      }
-    }
-    if (cnt > 1) {
-      LOG(WARNING) << "should not have more than one CPU devie";
-    }
-  }
  /**
   * If input only has MKLDNN device.
   * Otherwise, only support the previous layer using CPU device.
@@ -205,6 +294,7 @@ protected:
   */
  void setDevice(int id) { deviceId_ = id; }
+private:
  /**
   * Set deviceId of the params used in this layer.
   */
@@ -228,6 +318,42 @@ protected:
      parameter->setDevice(id);
    }
  }
+  /**
+   * Check the cpu device number of outputOtherDevice_.
+   * should have only one at most.
+   */
+  void checkCPUOutputsNumber(int max = 1) {
+    int cnt = 0;
+    for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
+      if (outputOtherDevice_[i].deviceId == CPU_DEVICE) {
+        ++cnt;
+      }
+    }
+    CHECK_LE(cnt, max) << "too much CPU devies";
+  }
+  /**
+   * copy SeqInfo from input layer to this output and other output devices.
+   * @note: do not use getInput(0) since it used this deviceId_,
+   *        use "inputLayers_[0]->getOutput()" instead.
+   */
+  void copySeqInfoToOutputs() {
+    if (inputLayers_.empty() || !needSequenceInfo_) {
+      return;
+    }
+    const Argument& input = inputLayers_[0]->getOutput();
+    output_.sequenceStartPositions = input.sequenceStartPositions;
+    output_.subSequenceStartPositions = input.subSequenceStartPositions;
+    output_.cpuSequenceDims = input.cpuSequenceDims;
+    for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
+      outputOtherDevice_[i].sequenceStartPositions =
+          output_.sequenceStartPositions;
+      outputOtherDevice_[i].subSequenceStartPositions =
+          output_.subSequenceStartPositions;
+      outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims;
+    }
+  }
 };
 }  // namespace paddle