diff --git a/paddle/gserver/layers/Layer.cpp b/paddle/gserver/layers/Layer.cpp index d5621412caee843e24a0d0c9b7096402765738c7..2bc20eee6c452d0943dbf43b17ebe77976c97489 100644 --- a/paddle/gserver/layers/Layer.cpp +++ b/paddle/gserver/layers/Layer.cpp @@ -41,7 +41,7 @@ namespace paddle { Layer::Layer(const LayerConfig& config, bool useGpu) : config_(config), useGpu_(useGpu), - deviceId_(-1), + deviceId_(CPU_DEVICE), needSequenceInfo_(true) {} bool Layer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { diff --git a/paddle/gserver/layers/Layer.h b/paddle/gserver/layers/Layer.h index 0ed482889d0cea884db3759620088575c5b10201..ec4d093e0cac9a766d17a36827affd2c08e1c618 100644 --- a/paddle/gserver/layers/Layer.h +++ b/paddle/gserver/layers/Layer.h @@ -59,7 +59,12 @@ protected: LayerConfig config_; /// whether to use GPU bool useGpu_; - /// Device Id. CPU is -1, and GPU is 0, 1, 2 ... + /// Paddle device ID, MKLDNN is -2, CPU is -1 + enum PADDLE_DEVICE_ID { + MKLDNN_DEVICE = -2, + CPU_DEVICE = -1, + }; + /// Device Id. MKLDNN is -2, CPU is -1, and GPU is 0, 1, 2 ... int deviceId_; /// Input layers std::vector inputLayers_; @@ -321,6 +326,19 @@ public: if (deviceId == getDeviceId()) { return output_; } else { + bool CPU2MKLDNN = + getDeviceId() == CPU_DEVICE && deviceId == MKLDNN_DEVICE; + bool MKLDNN2CPU = + getDeviceId() == MKLDNN_DEVICE && deviceId == CPU_DEVICE; + if (CPU2MKLDNN) { + // TODO: do something + return output_; + } else if (MKLDNN2CPU) { + // TODO: do something + return output_; + } + + // TODO: handle mkldnn device or add mkldnn device to other for (size_t i = 0; i < outputOtherDevice_.size(); i++) { if (outputOtherDevice_[i].deviceId == deviceId) { return outputOtherDevice_[i]; diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index d201fac65e0459050304195140e1aae081468f43..fac0390eee5014b8d2d20c8c1064e7e094218eef 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -135,33 +135,51 @@ void MKLDNNFcLayer::reshape() { void MKLDNNFcLayer::resetFwd() { bool hasBias = biases_ && biases_->getW(); - real* iData = getInputValue(0)->getData(); - real* oData = getOutputValue()->getData(); - real* wData = weight_->getW()->getData(); - real* bData = hasBias ? biases_->getW()->getData() : NULL; + const MatrixPtr& in = getInputValue(0); + const MatrixPtr& wgt = weight_->getW(); + const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr; + const MatrixPtr& out = output_.value; + + if (getPrev(0)->getDeviceId() == MKLDNN_DEVICE) { + inVal_ = std::dynamic_pointer_cast(in); + CHECK(inVal_) << "Input should be MKLDNNMatrix"; + // TODO: change input nchw to nc if available + // inVal_->downSpatial() + } else { + inVal_ = MKLDNNMatrix::create( + in, + hasSpatial_ ? memory::dims{bs_, ic_, ih_, iw_} : memory::dims{bs_, ic_}, + hasSpatial_ ? format::nchw : format::nc, + engine_); + } - // TODO(TJ): below create should be covered in MkldnnMatrix - // create memory desc - memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw) - : createMD({bs_, ic_}, format::nc); - memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw) - : createMD({oc_, ic_}, format::oi); - memory::desc bMD = bData != NULL ? createMD({oc_}, format::x) - : createMD({}, format::format_undef); - memory::desc oMD = createMD({bs_, oc_}, format::nc); + wgtVal_ = MKLDNNMatrix::create( + wgt, + hasSpatial_ ? memory::dims{oc_, ic_, ih_, iw_} : memory::dims{oc_, ic_}, + hasSpatial_ ? format::oihw : format::oi, + engine_); - // create memory primitive desc and memory self - inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); - wgtVal_.reset(new memory(memory::primitive_desc(wMD, engine_), wData)); - outVal_.reset(new memory(memory::primitive_desc(oMD, engine_), oData)); + biasVal_ = + hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr; + + outVal_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_); + + // change original output to mkldnn output + output_.value = std::dynamic_pointer_cast(outVal_); + // create forward handle prop_kind pk = prop_kind::forward; - fc_fwd::desc fwdDesc = bData != NULL ? fc_fwd::desc(pk, iMD, wMD, bMD, oMD) - : fc_fwd::desc(pk, iMD, wMD, oMD); + fc_fwd::desc fwdDesc = + hasBias ? fc_fwd::desc(pk, + inVal_->getMD(), + wgtVal_->getMD(), + biasVal_->getMD(), + outVal_->getMD()) + : fc_fwd::desc( + pk, inVal_->getMD(), wgtVal_->getMD(), outVal_->getMD()); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - if (bData != NULL) { - biasVal_.reset(new memory(memory::primitive_desc(bMD, engine_), bData)); + if (hasBias) { fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); } else { fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_)); @@ -197,7 +215,8 @@ void MKLDNNFcLayer::resetBwd() { // update data inVal_->set_data_handle(iData); } else { - inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); + LOG(FATAL) << "Should not be empty"; + // inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); } // create memory primitive desc and memory self diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 9533027fa6c756931941f93ae879dbb694f7dd57..b44095befb66a1b5b454b60d25d860e349e3abb9 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -21,7 +21,6 @@ limitations under the License. */ #include "paddle/math/MKLDNNMatrix.h" DECLARE_bool(use_mkldnn); -DECLARE_bool(use_mkldnn_wgt); namespace paddle { @@ -54,13 +53,14 @@ protected: std::vector pipelineBwd_; // TODO(TJ): change below memory as MKLDNNMatrixPtr type - std::shared_ptr inVal_; + // MKLDNNMatrixPtr ; + MKLDNNMatrixPtr inVal_; std::shared_ptr inGrad_; - std::shared_ptr outVal_; + MKLDNNMatrixPtr outVal_; std::shared_ptr outGrad_; - std::shared_ptr wgtVal_; + MKLDNNMatrixPtr wgtVal_; std::shared_ptr wgtGrad_; - std::shared_ptr biasVal_; + MKLDNNMatrixPtr biasVal_; std::shared_ptr biasGrad_; public: @@ -94,7 +94,7 @@ public: stream_.reset(new MKLDNNStream()); engine_ = CPUEngine::Instance().getEngine(); - // TODO(TJ): deivecId + setDeviceID(MKLDNN_DEVICE); return true; } @@ -128,6 +128,19 @@ public: // TODO(TJ): isFmtSuppoted(fmt) return mkldnn::memory::desc(dims, type, fmt); } + + void resetMKLDNNOutput(size_t height, size_t width) { + Layer::resetOutput(height, width); + // get valu and grad, use mkldnn matrix instaed + // output_.value; + } + +protected: + void setDeviceID(int id) { + deviceId_ = id; + output_.deviceId = id; + // TODO: handle mkldnn device or add mkldnn device to other + } }; } // namespace paddle diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt index ad6de18c81d60ba2023f708b81088ca38c9caa3b..8afe6b509d24afbad75ada2de78cd5626ebec354 100644 --- a/paddle/math/CMakeLists.txt +++ b/paddle/math/CMakeLists.txt @@ -15,13 +15,9 @@ file(GLOB MATH_HEADERS . *.h) file(GLOB MATH_SOURCES . *.cpp) -message(STATUS "----------MATH_HEADERS:${MATH_HEADERS}") -message(STATUS "----------MATH_SOURCES:${MATH_SOURCES}") if(NOT WITH_MKLDNN) file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h") file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp") - message(STATUS "----------DNN_HEADER:${DNN_HEADER}") - message(STATUS "----------DNN_SOURCES:${DNN_SOURCES}") list(REMOVE_ITEM MATH_HEADERS ${DNN_HEADER}) list(REMOVE_ITEM MATH_SOURCES ${DNN_SOURCES}) message(STATUS "Skip compiling with MKLDNNMatrix") diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp index df8e72d78bedd43608a138c2df9b5f7b23e0a438..44fc54278c993aa8b35d34d801f5684ddf397dc4 100644 --- a/paddle/math/MKLDNNMatrix.cpp +++ b/paddle/math/MKLDNNMatrix.cpp @@ -16,4 +16,31 @@ limitations under the License. */ using namespace mkldnn; // NOLINT -namespace paddle {} // namespace paddle +namespace paddle { + +MKLDNNMatrixPtr MKLDNNMatrix::create(const MatrixPtr& m, + memory::dims dims, + memory::format fmt, + engine& eg, + mkldnn::memory::data_type dtype) { + CpuMatrixPtr cpuM = std::dynamic_pointer_cast(m); + CHECK(cpuM) << "Only support create from CPU matrix yet"; + + size_t ndims = dims.size(); + CHECK(ndims > 0) << "Input dims should not be empty"; + size_t cnt = 1; + for (size_t i = 0; i < ndims; ++i) { + cnt *= dims[i]; + } + CHECK_EQ(cnt, m->getElementCnt()) << "Count size does not match"; + + size_t width = m->getWidth(); + size_t height = m->getHeight(); + real* data = m->getData(); + + memory::desc md = memory::desc(dims, dtype, fmt); + memory::primitive_desc pd = memory::primitive_desc(md, eg); + return std::make_shared(data, height, width, pd); +} + +} // namespace paddle diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index 91ef56f2c347614c75b13f831109785d7d94ee62..73eb50d2a0b3aceb577d0beb4ffb782b767f9029 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -14,9 +14,8 @@ limitations under the License. */ #pragma once -//#include "Matrix.h" -#include "Vector.h" - +#include +#include "Matrix.h" #include "mkldnn.hpp" #include "paddle/parameter/Parameter.h" @@ -32,14 +31,42 @@ typedef std::shared_ptr MKLDNNMatrixPtr; * @brief MKLDNN Matrix. * */ -class MKLDNNMatrix : public CpuVector { +class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory { public: - explicit MKLDNNMatrix(size_t size, int fmt) : CpuVector(size), fmt_(fmt) {} + MKLDNNMatrix(real* data, + size_t height, + size_t width, + mkldnn::memory::primitive_desc pd) + : CpuMatrix(data, height, width, false), mkldnn::memory(pd, data) {} - ~MKLDNNMatrix() {} + MKLDNNMatrix(size_t height, size_t width, mkldnn::memory::primitive_desc pd) + : CpuMatrix(height, width, false), mkldnn::memory(pd) { + set_data_handle(CpuMatrix::getData()); + } + + static MKLDNNMatrixPtr create( + const MatrixPtr& m, + mkldnn::memory::dims dims, + mkldnn::memory::format fmt, + mkldnn::engine& eg, + mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32); + + /** + * Get primitive descriptor + */ + mkldnn::memory::primitive_desc getPD() { return this->get_primitive_desc(); } -protected: - int fmt_; + /** + * Get memory descriptor + */ + mkldnn::memory::desc getMD() { return getPD().desc(); } + + /** + * Get format + */ + int getFormat() { return getMD().data.format; } + + ~MKLDNNMatrix() {} }; } // namespace paddle