use MKLDNNMatrix in fc forward

4bffbd30 · tensor-tang · 62e6dac4 · 4bffbd30 · 4bffbd30 · 4bffbd30
7 changed file
--- a/paddle/gserver/layers/Layer.cpp
+++ b/paddle/gserver/layers/Layer.cpp
@@ -41,7 +41,7 @@ namespace paddle {
 Layer::Layer(const LayerConfig& config, bool useGpu)
    : config_(config),
      useGpu_(useGpu),
-      deviceId_(-1),
+      deviceId_(CPU_DEVICE),
      needSequenceInfo_(true) {}
 bool Layer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) {

--- a/paddle/gserver/layers/Layer.h
+++ b/paddle/gserver/layers/Layer.h
@@ -59,7 +59,12 @@ protected:
  LayerConfig config_;
  /// whether to use GPU
  bool useGpu_;
-  /// Device Id. CPU is -1, and GPU is 0, 1, 2 ...
+  /// Paddle device ID, MKLDNN is -2, CPU is -1
+  enum PADDLE_DEVICE_ID {
+    MKLDNN_DEVICE = -2,
+    CPU_DEVICE = -1,
+  };
+  /// Device Id. MKLDNN is -2, CPU is -1, and GPU is 0, 1, 2 ...
  int deviceId_;
  /// Input layers
  std::vector<LayerPtr> inputLayers_;
@@ -321,6 +326,19 @@ public:
    if (deviceId == getDeviceId()) {
      return output_;
    } else {
+      bool CPU2MKLDNN =
+          getDeviceId() == CPU_DEVICE && deviceId == MKLDNN_DEVICE;
+      bool MKLDNN2CPU =
+          getDeviceId() == MKLDNN_DEVICE && deviceId == CPU_DEVICE;
+      if (CPU2MKLDNN) {
+        // TODO: do something
+        return output_;
+      } else if (MKLDNN2CPU) {
+        // TODO: do something
+        return output_;
+      }
+      // TODO: handle mkldnn device or add mkldnn device to other
      for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
        if (outputOtherDevice_[i].deviceId == deviceId) {
          return outputOtherDevice_[i];

--- a/paddle/gserver/layers/MKLDNNFcLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp
@@ -135,33 +135,51 @@ void MKLDNNFcLayer::reshape() {
 void MKLDNNFcLayer::resetFwd() {
  bool hasBias = biases_ && biases_->getW();
-  real* iData = getInputValue(0)->getData();
+  const MatrixPtr& in = getInputValue(0);
-  real* oData = getOutputValue()->getData();
+  const MatrixPtr& wgt = weight_->getW();
-  real* wData = weight_->getW()->getData();
+  const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr;
-  real* bData = hasBias ? biases_->getW()->getData() : NULL;
+  const MatrixPtr& out = output_.value;
+  if (getPrev(0)->getDeviceId() == MKLDNN_DEVICE) {
+    inVal_ = std::dynamic_pointer_cast<MKLDNNMatrix>(in);
+    CHECK(inVal_) << "Input should be MKLDNNMatrix";
+    // TODO:  change input nchw to nc if available
+    // inVal_->downSpatial()
+  } else {
+    inVal_ = MKLDNNMatrix::create(
+        in,
+        hasSpatial_ ? memory::dims{bs_, ic_, ih_, iw_} : memory::dims{bs_, ic_},
+        hasSpatial_ ? format::nchw : format::nc,
+        engine_);
+  }
-  // TODO(TJ): below create should be covered in MkldnnMatrix
+  wgtVal_ = MKLDNNMatrix::create(
-  // create memory desc
+      wgt,
-  memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw)
+      hasSpatial_ ? memory::dims{oc_, ic_, ih_, iw_} : memory::dims{oc_, ic_},
-                                 : createMD({bs_, ic_}, format::nc);
+      hasSpatial_ ? format::oihw : format::oi,
-  memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw)
+      engine_);
-                                 : createMD({oc_, ic_}, format::oi);
-  memory::desc bMD = bData != NULL ? createMD({oc_}, format::x)
-                                   : createMD({}, format::format_undef);
-  memory::desc oMD = createMD({bs_, oc_}, format::nc);
-  // create memory primitive desc and memory self
+  biasVal_ =
-  inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData));
+      hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr;
-  wgtVal_.reset(new memory(memory::primitive_desc(wMD, engine_), wData));
-  outVal_.reset(new memory(memory::primitive_desc(oMD, engine_), oData));
+  outVal_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_);
+  // change original output to mkldnn output
+  output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
+  // create forward handle
  prop_kind pk = prop_kind::forward;
-  fc_fwd::desc fwdDesc = bData != NULL ? fc_fwd::desc(pk, iMD, wMD, bMD, oMD)
+  fc_fwd::desc fwdDesc =
-                                       : fc_fwd::desc(pk, iMD, wMD, oMD);
+      hasBias ? fc_fwd::desc(pk,
+                             inVal_->getMD(),
+                             wgtVal_->getMD(),
+                             biasVal_->getMD(),
+                             outVal_->getMD())
+              : fc_fwd::desc(
+                    pk, inVal_->getMD(), wgtVal_->getMD(), outVal_->getMD());
  fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_);
-  if (bData != NULL) {
+  if (hasBias) {
-    biasVal_.reset(new memory(memory::primitive_desc(bMD, engine_), bData));
    fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_));
  } else {
    fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_));
@@ -197,7 +215,8 @@ void MKLDNNFcLayer::resetBwd() {
    // update data
    inVal_->set_data_handle(iData);
  } else {
-    inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData));
+    LOG(FATAL) << "Should not be empty";
+    // inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData));
  }
  // create memory primitive desc and memory self

--- a/paddle/gserver/layers/MKLDNNLayer.h
+++ b/paddle/gserver/layers/MKLDNNLayer.h
@@ -21,7 +21,6 @@ limitations under the License. */
 #include "paddle/math/MKLDNNMatrix.h"
 DECLARE_bool(use_mkldnn);
-DECLARE_bool(use_mkldnn_wgt);
 namespace paddle {
@@ -54,13 +53,14 @@ protected:
  std::vector<mkldnn::primitive> pipelineBwd_;
  // TODO(TJ): change below memory as MKLDNNMatrixPtr type
-  std::shared_ptr<mkldnn::memory> inVal_;
+  // MKLDNNMatrixPtr ;
+  MKLDNNMatrixPtr inVal_;
  std::shared_ptr<mkldnn::memory> inGrad_;
-  std::shared_ptr<mkldnn::memory> outVal_;
+  MKLDNNMatrixPtr outVal_;
  std::shared_ptr<mkldnn::memory> outGrad_;
-  std::shared_ptr<mkldnn::memory> wgtVal_;
+  MKLDNNMatrixPtr wgtVal_;
  std::shared_ptr<mkldnn::memory> wgtGrad_;
-  std::shared_ptr<mkldnn::memory> biasVal_;
+  MKLDNNMatrixPtr biasVal_;
  std::shared_ptr<mkldnn::memory> biasGrad_;
 public:
@@ -94,7 +94,7 @@ public:
    stream_.reset(new MKLDNNStream());
    engine_ = CPUEngine::Instance().getEngine();
-    // TODO(TJ): deivecId
+    setDeviceID(MKLDNN_DEVICE);
    return true;
  }
@@ -128,6 +128,19 @@ public:
    // TODO(TJ): isFmtSuppoted(fmt)
    return mkldnn::memory::desc(dims, type, fmt);
  }
+  void resetMKLDNNOutput(size_t height, size_t width) {
+    Layer::resetOutput(height, width);
+    // get valu and grad, use mkldnn matrix instaed
+    // output_.value;
+  }
+protected:
+  void setDeviceID(int id) {
+    deviceId_ = id;
+    output_.deviceId = id;
+    // TODO: handle mkldnn device or add mkldnn device to other
+  }
 };
 }  // namespace paddle
--- a/paddle/math/CMakeLists.txt
+++ b/paddle/math/CMakeLists.txt
@@ -15,13 +15,9 @@
 file(GLOB MATH_HEADERS . *.h)
 file(GLOB MATH_SOURCES . *.cpp)
-message(STATUS "----------MATH_HEADERS:${MATH_HEADERS}")
-message(STATUS "----------MATH_SOURCES:${MATH_SOURCES}")
 if(NOT WITH_MKLDNN)
    file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h")
    file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp")
-    message(STATUS "----------DNN_HEADER:${DNN_HEADER}")
-    message(STATUS "----------DNN_SOURCES:${DNN_SOURCES}")
    list(REMOVE_ITEM MATH_HEADERS ${DNN_HEADER})
    list(REMOVE_ITEM MATH_SOURCES ${DNN_SOURCES})
    message(STATUS "Skip compiling with MKLDNNMatrix")

--- a/paddle/math/MKLDNNMatrix.cpp
+++ b/paddle/math/MKLDNNMatrix.cpp
@@ -16,4 +16,31 @@ limitations under the License. */
 using namespace mkldnn;  // NOLINT
-namespace paddle {}  // namespace paddle
+namespace paddle {
+MKLDNNMatrixPtr MKLDNNMatrix::create(const MatrixPtr& m,
+                                     memory::dims dims,
+                                     memory::format fmt,
+                                     engine& eg,
+                                     mkldnn::memory::data_type dtype) {
+  CpuMatrixPtr cpuM = std::dynamic_pointer_cast<CpuMatrix>(m);
+  CHECK(cpuM) << "Only support create from CPU matrix yet";
+  size_t ndims = dims.size();
+  CHECK(ndims > 0) << "Input dims should not be empty";
+  size_t cnt = 1;
+  for (size_t i = 0; i < ndims; ++i) {
+    cnt *= dims[i];
+  }
+  CHECK_EQ(cnt, m->getElementCnt()) << "Count size does not match";
+  size_t width = m->getWidth();
+  size_t height = m->getHeight();
+  real* data = m->getData();
+  memory::desc md = memory::desc(dims, dtype, fmt);
+  memory::primitive_desc pd = memory::primitive_desc(md, eg);
+  return std::make_shared<MKLDNNMatrix>(data, height, width, pd);
+}
+}  // namespace paddle
--- a/paddle/math/MKLDNNMatrix.h
+++ b/paddle/math/MKLDNNMatrix.h
@@ -14,9 +14,8 @@ limitations under the License. */
 #pragma once
-//#include "Matrix.h"
+#include <vector>
-#include "Vector.h"
+#include "Matrix.h"
 #include "mkldnn.hpp"
 #include "paddle/parameter/Parameter.h"
@@ -32,14 +31,42 @@ typedef std::shared_ptr<MKLDNNMatrix> MKLDNNMatrixPtr;
 * @brief MKLDNN Matrix.
 *
 */
-class MKLDNNMatrix : public CpuVector {
+class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory {
 public:
-  explicit MKLDNNMatrix(size_t size, int fmt) : CpuVector(size), fmt_(fmt) {}
+  MKLDNNMatrix(real* data,
+               size_t height,
+               size_t width,
+               mkldnn::memory::primitive_desc pd)
+      : CpuMatrix(data, height, width, false), mkldnn::memory(pd, data) {}
-  ~MKLDNNMatrix() {}
+  MKLDNNMatrix(size_t height, size_t width, mkldnn::memory::primitive_desc pd)
+      : CpuMatrix(height, width, false), mkldnn::memory(pd) {
+    set_data_handle(CpuMatrix::getData());
+  }
+  static MKLDNNMatrixPtr create(
+      const MatrixPtr& m,
+      mkldnn::memory::dims dims,
+      mkldnn::memory::format fmt,
+      mkldnn::engine& eg,
+      mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32);
+  /**
+   * Get primitive descriptor
+   */
+  mkldnn::memory::primitive_desc getPD() { return this->get_primitive_desc(); }
-protected:
+  /**
-  int fmt_;
+   * Get memory descriptor
+   */
+  mkldnn::memory::desc getMD() { return getPD().desc(); }
+  /**
+   * Get format
+   */
+  int getFormat() { return getMD().data.format; }
+  ~MKLDNNMatrix() {}
 };
 }  // namespace paddle