remove convertOutputToOtherDevice

d4c07348 · tensor-tang · d74fe780 · d4c07348 · d4c07348 · d4c07348
3 changed file
--- a/paddle/gserver/layers/MKLDNNFcLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp
@@ -77,24 +77,6 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
  wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim);
 }
-void MKLDNNFcLayer::convertOutputToOtherDevice() {
-  copyOutputInfoToOtherDevice();
-  // find other cpu device and reorder output to cpu device
-  int cnt = 0;
-  for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
-    if (outputOtherDevice_[i].deviceId == CPU_DEVICE) {
-      // fc cpu output value do not need convert
-      // just share point
-      outputOtherDevice_[i].value = output_.value;
-      ++cnt;
-    }
-  }
-  if (cnt > 1) {
-    LOG(WARNING) << "should not have more than one CPU devie";
-  }
-}
 void MKLDNNFcLayer::reshape() {
  const Argument& input = getInput(0, getPrev(0)->getDeviceId());
  int batchSize = input.getBatchSize();
@@ -155,7 +137,10 @@ void MKLDNNFcLayer::resetFwd() {
  // change original output value to mkldnn output value
  output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
  if (!outputIsOnlyMKLDNN()) {
-    convertOutputToOtherDevice();
+    copyOutputInfoToOtherDevice();
+    // fc cpu output value do not need create convert
+    // just share point
+    getOutput(CPU_DEVICE).value->setData(output_.value->getData());
  }
  // create forward handle

--- a/paddle/gserver/layers/MKLDNNFcLayer.h
+++ b/paddle/gserver/layers/MKLDNNFcLayer.h
@@ -72,8 +72,6 @@ protected:
   * only would be called when needed
   */
  void resetBwd();
-  void convertOutputToOtherDevice() override;
 };
 }  // namespace paddle
--- a/paddle/gserver/layers/MKLDNNLayer.h
+++ b/paddle/gserver/layers/MKLDNNLayer.h
@@ -113,12 +113,6 @@ public:
   */
  virtual void convertWeightsToPaddle() {}
-  /**
-   * convert MKLDNN output to other device.
-   * only support CPU device yet
-   */
-  virtual void convertOutputToOtherDevice() {}
  /**
   * print info about sizes
   */
@@ -155,6 +149,7 @@ protected:
   *        copy base info and do not copy data value
   */
  void copyOutputInfoToOtherDevice() {
+    int cnt = 0;
    for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
      outputOtherDevice_[i].setFrameHeight(output_.getFrameHeight());
      outputOtherDevice_[i].setFrameWidth(output_.getFrameWidth());
@@ -163,6 +158,12 @@ protected:
      outputOtherDevice_[i].subSequenceStartPositions =
          output_.subSequenceStartPositions;
      outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims;
+      if (outputOtherDevice_[i].deviceId == CPU_DEVICE) {
+        ++cnt;
+      }
+    }
+    if (cnt > 1) {
+      LOG(WARNING) << "should not have more than one CPU devie";
    }
  }