From 4bffbd30f0dbc2a2bbff4aa8108867fceecc260a Mon Sep 17 00:00:00 2001
From: tensor-tang <jian.j.tang@intel.com>
Date: Mon, 21 Aug 2017 16:44:30 +0800
Subject: [PATCH] use MKLDNNMatrix in fc forward

---
 paddle/gserver/layers/Layer.cpp         |  2 +-
 paddle/gserver/layers/Layer.h           | 20 +++++++-
 paddle/gserver/layers/MKLDNNFcLayer.cpp | 63 ++++++++++++++++---------
 paddle/gserver/layers/MKLDNNLayer.h     | 25 +++++++---
 paddle/math/CMakeLists.txt              |  4 --
 paddle/math/MKLDNNMatrix.cpp            | 29 +++++++++++-
 paddle/math/MKLDNNMatrix.h              | 43 +++++++++++++----
 7 files changed, 143 insertions(+), 43 deletions(-)
diff --git a/paddle/gserver/layers/Layer.cpp b/paddle/gserver/layers/Layer.cpp
index d5621412ca..2bc20eee6c 100644
--- a/paddle/gserver/layers/Layer.cpp
+++ b/paddle/gserver/layers/Layer.cpp
@@ -41,7 +41,7 @@ namespace paddle {
 Layer::Layer(const LayerConfig& config, bool useGpu)
     : config_(config),
       useGpu_(useGpu),
-      deviceId_(-1),
+      deviceId_(CPU_DEVICE),
       needSequenceInfo_(true) {}
 
 bool Layer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) {
diff --git a/paddle/gserver/layers/Layer.h b/paddle/gserver/layers/Layer.h
index 0ed482889d..ec4d093e0c 100644
--- a/paddle/gserver/layers/Layer.h
+++ b/paddle/gserver/layers/Layer.h
@@ -59,7 +59,12 @@ protected:
   LayerConfig config_;
   /// whether to use GPU
   bool useGpu_;
-  /// Device Id. CPU is -1, and GPU is 0, 1, 2 ...
+  /// Paddle device ID, MKLDNN is -2, CPU is -1
+  enum PADDLE_DEVICE_ID {
+    MKLDNN_DEVICE = -2,
+    CPU_DEVICE = -1,
+  };
+  /// Device Id. MKLDNN is -2, CPU is -1, and GPU is 0, 1, 2 ...
   int deviceId_;
   /// Input layers
   std::vector<LayerPtr> inputLayers_;
@@ -321,6 +326,19 @@ public:
     if (deviceId == getDeviceId()) {
       return output_;
     } else {
+      bool CPU2MKLDNN =
+          getDeviceId() == CPU_DEVICE && deviceId == MKLDNN_DEVICE;
+      bool MKLDNN2CPU =
+          getDeviceId() == MKLDNN_DEVICE && deviceId == CPU_DEVICE;
+      if (CPU2MKLDNN) {
+        // TODO: do something
+        return output_;
+      } else if (MKLDNN2CPU) {
+        // TODO: do something
+        return output_;
+      }
+
+      // TODO: handle mkldnn device or add mkldnn device to other
       for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
         if (outputOtherDevice_[i].deviceId == deviceId) {
           return outputOtherDevice_[i];
diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp
index d201fac65e..fac0390eee 100644
--- a/paddle/gserver/layers/MKLDNNFcLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp
@@ -135,33 +135,51 @@ void MKLDNNFcLayer::reshape() {
 
 void MKLDNNFcLayer::resetFwd() {
   bool hasBias = biases_ && biases_->getW();
-  real* iData = getInputValue(0)->getData();
-  real* oData = getOutputValue()->getData();
-  real* wData = weight_->getW()->getData();
-  real* bData = hasBias ? biases_->getW()->getData() : NULL;
+  const MatrixPtr& in = getInputValue(0);
+  const MatrixPtr& wgt = weight_->getW();
+  const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr;
+  const MatrixPtr& out = output_.value;
+
+  if (getPrev(0)->getDeviceId() == MKLDNN_DEVICE) {
+    inVal_ = std::dynamic_pointer_cast<MKLDNNMatrix>(in);
+    CHECK(inVal_) << "Input should be MKLDNNMatrix";
+    // TODO:  change input nchw to nc if available
+    // inVal_->downSpatial()
+  } else {
+    inVal_ = MKLDNNMatrix::create(
+        in,
+        hasSpatial_ ? memory::dims{bs_, ic_, ih_, iw_} : memory::dims{bs_, ic_},
+        hasSpatial_ ? format::nchw : format::nc,
+        engine_);
+  }
 
-  // TODO(TJ): below create should be covered in MkldnnMatrix
-  // create memory desc
-  memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw)
-                                 : createMD({bs_, ic_}, format::nc);
-  memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw)
-                                 : createMD({oc_, ic_}, format::oi);
-  memory::desc bMD = bData != NULL ? createMD({oc_}, format::x)
-                                   : createMD({}, format::format_undef);
-  memory::desc oMD = createMD({bs_, oc_}, format::nc);
+  wgtVal_ = MKLDNNMatrix::create(
+      wgt,
+      hasSpatial_ ? memory::dims{oc_, ic_, ih_, iw_} : memory::dims{oc_, ic_},
+      hasSpatial_ ? format::oihw : format::oi,
+      engine_);
 
-  // create memory primitive desc and memory self
-  inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData));
-  wgtVal_.reset(new memory(memory::primitive_desc(wMD, engine_), wData));
-  outVal_.reset(new memory(memory::primitive_desc(oMD, engine_), oData));
+  biasVal_ =
+      hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr;
+
+  outVal_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_);
+
+  // change original output to mkldnn output
+  output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
 
+  // create forward handle
   prop_kind pk = prop_kind::forward;
-  fc_fwd::desc fwdDesc = bData != NULL ? fc_fwd::desc(pk, iMD, wMD, bMD, oMD)
-                                       : fc_fwd::desc(pk, iMD, wMD, oMD);
+  fc_fwd::desc fwdDesc =
+      hasBias ? fc_fwd::desc(pk,
+                             inVal_->getMD(),
+                             wgtVal_->getMD(),
+                             biasVal_->getMD(),
+                             outVal_->getMD())
+              : fc_fwd::desc(
+                    pk, inVal_->getMD(), wgtVal_->getMD(), outVal_->getMD());
   fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_);
 
-  if (bData != NULL) {
-    biasVal_.reset(new memory(memory::primitive_desc(bMD, engine_), bData));
+  if (hasBias) {
     fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_));
   } else {
     fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_));
@@ -197,7 +215,8 @@ void MKLDNNFcLayer::resetBwd() {
     // update data
     inVal_->set_data_handle(iData);
   } else {
-    inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData));
+    LOG(FATAL) << "Should not be empty";
+    // inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData));
   }
 
   // create memory primitive desc and memory self
diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h
index 9533027fa6..b44095befb 100644
--- a/paddle/gserver/layers/MKLDNNLayer.h
+++ b/paddle/gserver/layers/MKLDNNLayer.h
@@ -21,7 +21,6 @@ limitations under the License. */
 #include "paddle/math/MKLDNNMatrix.h"
 
 DECLARE_bool(use_mkldnn);
-DECLARE_bool(use_mkldnn_wgt);
 
 namespace paddle {
 
@@ -54,13 +53,14 @@ protected:
   std::vector<mkldnn::primitive> pipelineBwd_;
 
   // TODO(TJ): change below memory as MKLDNNMatrixPtr type
-  std::shared_ptr<mkldnn::memory> inVal_;
+  // MKLDNNMatrixPtr ;
+  MKLDNNMatrixPtr inVal_;
   std::shared_ptr<mkldnn::memory> inGrad_;
-  std::shared_ptr<mkldnn::memory> outVal_;
+  MKLDNNMatrixPtr outVal_;
   std::shared_ptr<mkldnn::memory> outGrad_;
-  std::shared_ptr<mkldnn::memory> wgtVal_;
+  MKLDNNMatrixPtr wgtVal_;
   std::shared_ptr<mkldnn::memory> wgtGrad_;
-  std::shared_ptr<mkldnn::memory> biasVal_;
+  MKLDNNMatrixPtr biasVal_;
   std::shared_ptr<mkldnn::memory> biasGrad_;
 
 public:
@@ -94,7 +94,7 @@ public:
     stream_.reset(new MKLDNNStream());
     engine_ = CPUEngine::Instance().getEngine();
 
-    // TODO(TJ): deivecId
+    setDeviceID(MKLDNN_DEVICE);
     return true;
   }
 
@@ -128,6 +128,19 @@ public:
     // TODO(TJ): isFmtSuppoted(fmt)
     return mkldnn::memory::desc(dims, type, fmt);
   }
+
+  void resetMKLDNNOutput(size_t height, size_t width) {
+    Layer::resetOutput(height, width);
+    // get valu and grad, use mkldnn matrix instaed
+    // output_.value;
+  }
+
+protected:
+  void setDeviceID(int id) {
+    deviceId_ = id;
+    output_.deviceId = id;
+    // TODO: handle mkldnn device or add mkldnn device to other
+  }
 };
 
 }  // namespace paddle
diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt
index ad6de18c81..8afe6b509d 100644
--- a/paddle/math/CMakeLists.txt
+++ b/paddle/math/CMakeLists.txt
@@ -15,13 +15,9 @@
 file(GLOB MATH_HEADERS . *.h)
 file(GLOB MATH_SOURCES . *.cpp)
 
-message(STATUS "----------MATH_HEADERS:${MATH_HEADERS}")
-message(STATUS "----------MATH_SOURCES:${MATH_SOURCES}")
 if(NOT WITH_MKLDNN)
     file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h")
     file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp")
-    message(STATUS "----------DNN_HEADER:${DNN_HEADER}")
-    message(STATUS "----------DNN_SOURCES:${DNN_SOURCES}")
     list(REMOVE_ITEM MATH_HEADERS ${DNN_HEADER})
     list(REMOVE_ITEM MATH_SOURCES ${DNN_SOURCES})
     message(STATUS "Skip compiling with MKLDNNMatrix")
diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp
index df8e72d78b..44fc54278c 100644
--- a/paddle/math/MKLDNNMatrix.cpp
+++ b/paddle/math/MKLDNNMatrix.cpp
@@ -16,4 +16,31 @@ limitations under the License. */
 
 using namespace mkldnn;  // NOLINT
 
-namespace paddle {}  // namespace paddle
+namespace paddle {
+
+MKLDNNMatrixPtr MKLDNNMatrix::create(const MatrixPtr& m,
+                                     memory::dims dims,
+                                     memory::format fmt,
+                                     engine& eg,
+                                     mkldnn::memory::data_type dtype) {
+  CpuMatrixPtr cpuM = std::dynamic_pointer_cast<CpuMatrix>(m);
+  CHECK(cpuM) << "Only support create from CPU matrix yet";
+
+  size_t ndims = dims.size();
+  CHECK(ndims > 0) << "Input dims should not be empty";
+  size_t cnt = 1;
+  for (size_t i = 0; i < ndims; ++i) {
+    cnt *= dims[i];
+  }
+  CHECK_EQ(cnt, m->getElementCnt()) << "Count size does not match";
+
+  size_t width = m->getWidth();
+  size_t height = m->getHeight();
+  real* data = m->getData();
+
+  memory::desc md = memory::desc(dims, dtype, fmt);
+  memory::primitive_desc pd = memory::primitive_desc(md, eg);
+  return std::make_shared<MKLDNNMatrix>(data, height, width, pd);
+}
+
+}  // namespace paddle
diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h
index 91ef56f2c3..73eb50d2a0 100644
--- a/paddle/math/MKLDNNMatrix.h
+++ b/paddle/math/MKLDNNMatrix.h
@@ -14,9 +14,8 @@ limitations under the License. */
 
 #pragma once
 
-//#include "Matrix.h"
-#include "Vector.h"
-
+#include <vector>
+#include "Matrix.h"
 #include "mkldnn.hpp"
 #include "paddle/parameter/Parameter.h"
 
@@ -32,14 +31,42 @@ typedef std::shared_ptr<MKLDNNMatrix> MKLDNNMatrixPtr;
  * @brief MKLDNN Matrix.
  *
  */
-class MKLDNNMatrix : public CpuVector {
+class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory {
 public:
-  explicit MKLDNNMatrix(size_t size, int fmt) : CpuVector(size), fmt_(fmt) {}
+  MKLDNNMatrix(real* data,
+               size_t height,
+               size_t width,
+               mkldnn::memory::primitive_desc pd)
+      : CpuMatrix(data, height, width, false), mkldnn::memory(pd, data) {}
 
-  ~MKLDNNMatrix() {}
+  MKLDNNMatrix(size_t height, size_t width, mkldnn::memory::primitive_desc pd)
+      : CpuMatrix(height, width, false), mkldnn::memory(pd) {
+    set_data_handle(CpuMatrix::getData());
+  }
+
+  static MKLDNNMatrixPtr create(
+      const MatrixPtr& m,
+      mkldnn::memory::dims dims,
+      mkldnn::memory::format fmt,
+      mkldnn::engine& eg,
+      mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32);
+
+  /**
+   * Get primitive descriptor
+   */
+  mkldnn::memory::primitive_desc getPD() { return this->get_primitive_desc(); }
 
-protected:
-  int fmt_;
+  /**
+   * Get memory descriptor
+   */
+  mkldnn::memory::desc getMD() { return getPD().desc(); }
+
+  /**
+   * Get format
+   */
+  int getFormat() { return getMD().data.format; }
+
+  ~MKLDNNMatrix() {}
 };
 
 }  // namespace paddle
-- 
GitLab