提交 4bffbd30 编写于 作者: T tensor-tang

use MKLDNNMatrix in fc forward

上级 62e6dac4
...@@ -41,7 +41,7 @@ namespace paddle { ...@@ -41,7 +41,7 @@ namespace paddle {
Layer::Layer(const LayerConfig& config, bool useGpu) Layer::Layer(const LayerConfig& config, bool useGpu)
: config_(config), : config_(config),
useGpu_(useGpu), useGpu_(useGpu),
deviceId_(-1), deviceId_(CPU_DEVICE),
needSequenceInfo_(true) {} needSequenceInfo_(true) {}
bool Layer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { bool Layer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) {
......
...@@ -59,7 +59,12 @@ protected: ...@@ -59,7 +59,12 @@ protected:
LayerConfig config_; LayerConfig config_;
/// whether to use GPU /// whether to use GPU
bool useGpu_; bool useGpu_;
/// Device Id. CPU is -1, and GPU is 0, 1, 2 ... /// Paddle device ID, MKLDNN is -2, CPU is -1
enum PADDLE_DEVICE_ID {
MKLDNN_DEVICE = -2,
CPU_DEVICE = -1,
};
/// Device Id. MKLDNN is -2, CPU is -1, and GPU is 0, 1, 2 ...
int deviceId_; int deviceId_;
/// Input layers /// Input layers
std::vector<LayerPtr> inputLayers_; std::vector<LayerPtr> inputLayers_;
...@@ -321,6 +326,19 @@ public: ...@@ -321,6 +326,19 @@ public:
if (deviceId == getDeviceId()) { if (deviceId == getDeviceId()) {
return output_; return output_;
} else { } else {
bool CPU2MKLDNN =
getDeviceId() == CPU_DEVICE && deviceId == MKLDNN_DEVICE;
bool MKLDNN2CPU =
getDeviceId() == MKLDNN_DEVICE && deviceId == CPU_DEVICE;
if (CPU2MKLDNN) {
// TODO: do something
return output_;
} else if (MKLDNN2CPU) {
// TODO: do something
return output_;
}
// TODO: handle mkldnn device or add mkldnn device to other
for (size_t i = 0; i < outputOtherDevice_.size(); i++) { for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
if (outputOtherDevice_[i].deviceId == deviceId) { if (outputOtherDevice_[i].deviceId == deviceId) {
return outputOtherDevice_[i]; return outputOtherDevice_[i];
......
...@@ -135,33 +135,51 @@ void MKLDNNFcLayer::reshape() { ...@@ -135,33 +135,51 @@ void MKLDNNFcLayer::reshape() {
void MKLDNNFcLayer::resetFwd() { void MKLDNNFcLayer::resetFwd() {
bool hasBias = biases_ && biases_->getW(); bool hasBias = biases_ && biases_->getW();
real* iData = getInputValue(0)->getData(); const MatrixPtr& in = getInputValue(0);
real* oData = getOutputValue()->getData(); const MatrixPtr& wgt = weight_->getW();
real* wData = weight_->getW()->getData(); const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr;
real* bData = hasBias ? biases_->getW()->getData() : NULL; const MatrixPtr& out = output_.value;
if (getPrev(0)->getDeviceId() == MKLDNN_DEVICE) {
inVal_ = std::dynamic_pointer_cast<MKLDNNMatrix>(in);
CHECK(inVal_) << "Input should be MKLDNNMatrix";
// TODO: change input nchw to nc if available
// inVal_->downSpatial()
} else {
inVal_ = MKLDNNMatrix::create(
in,
hasSpatial_ ? memory::dims{bs_, ic_, ih_, iw_} : memory::dims{bs_, ic_},
hasSpatial_ ? format::nchw : format::nc,
engine_);
}
// TODO(TJ): below create should be covered in MkldnnMatrix wgtVal_ = MKLDNNMatrix::create(
// create memory desc wgt,
memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw) hasSpatial_ ? memory::dims{oc_, ic_, ih_, iw_} : memory::dims{oc_, ic_},
: createMD({bs_, ic_}, format::nc); hasSpatial_ ? format::oihw : format::oi,
memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw) engine_);
: createMD({oc_, ic_}, format::oi);
memory::desc bMD = bData != NULL ? createMD({oc_}, format::x)
: createMD({}, format::format_undef);
memory::desc oMD = createMD({bs_, oc_}, format::nc);
// create memory primitive desc and memory self biasVal_ =
inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr;
wgtVal_.reset(new memory(memory::primitive_desc(wMD, engine_), wData));
outVal_.reset(new memory(memory::primitive_desc(oMD, engine_), oData)); outVal_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_);
// change original output to mkldnn output
output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
// create forward handle
prop_kind pk = prop_kind::forward; prop_kind pk = prop_kind::forward;
fc_fwd::desc fwdDesc = bData != NULL ? fc_fwd::desc(pk, iMD, wMD, bMD, oMD) fc_fwd::desc fwdDesc =
: fc_fwd::desc(pk, iMD, wMD, oMD); hasBias ? fc_fwd::desc(pk,
inVal_->getMD(),
wgtVal_->getMD(),
biasVal_->getMD(),
outVal_->getMD())
: fc_fwd::desc(
pk, inVal_->getMD(), wgtVal_->getMD(), outVal_->getMD());
fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_);
if (bData != NULL) { if (hasBias) {
biasVal_.reset(new memory(memory::primitive_desc(bMD, engine_), bData));
fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_));
} else { } else {
fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_)); fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_));
...@@ -197,7 +215,8 @@ void MKLDNNFcLayer::resetBwd() { ...@@ -197,7 +215,8 @@ void MKLDNNFcLayer::resetBwd() {
// update data // update data
inVal_->set_data_handle(iData); inVal_->set_data_handle(iData);
} else { } else {
inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); LOG(FATAL) << "Should not be empty";
// inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData));
} }
// create memory primitive desc and memory self // create memory primitive desc and memory self
......
...@@ -21,7 +21,6 @@ limitations under the License. */ ...@@ -21,7 +21,6 @@ limitations under the License. */
#include "paddle/math/MKLDNNMatrix.h" #include "paddle/math/MKLDNNMatrix.h"
DECLARE_bool(use_mkldnn); DECLARE_bool(use_mkldnn);
DECLARE_bool(use_mkldnn_wgt);
namespace paddle { namespace paddle {
...@@ -54,13 +53,14 @@ protected: ...@@ -54,13 +53,14 @@ protected:
std::vector<mkldnn::primitive> pipelineBwd_; std::vector<mkldnn::primitive> pipelineBwd_;
// TODO(TJ): change below memory as MKLDNNMatrixPtr type // TODO(TJ): change below memory as MKLDNNMatrixPtr type
std::shared_ptr<mkldnn::memory> inVal_; // MKLDNNMatrixPtr ;
MKLDNNMatrixPtr inVal_;
std::shared_ptr<mkldnn::memory> inGrad_; std::shared_ptr<mkldnn::memory> inGrad_;
std::shared_ptr<mkldnn::memory> outVal_; MKLDNNMatrixPtr outVal_;
std::shared_ptr<mkldnn::memory> outGrad_; std::shared_ptr<mkldnn::memory> outGrad_;
std::shared_ptr<mkldnn::memory> wgtVal_; MKLDNNMatrixPtr wgtVal_;
std::shared_ptr<mkldnn::memory> wgtGrad_; std::shared_ptr<mkldnn::memory> wgtGrad_;
std::shared_ptr<mkldnn::memory> biasVal_; MKLDNNMatrixPtr biasVal_;
std::shared_ptr<mkldnn::memory> biasGrad_; std::shared_ptr<mkldnn::memory> biasGrad_;
public: public:
...@@ -94,7 +94,7 @@ public: ...@@ -94,7 +94,7 @@ public:
stream_.reset(new MKLDNNStream()); stream_.reset(new MKLDNNStream());
engine_ = CPUEngine::Instance().getEngine(); engine_ = CPUEngine::Instance().getEngine();
// TODO(TJ): deivecId setDeviceID(MKLDNN_DEVICE);
return true; return true;
} }
...@@ -128,6 +128,19 @@ public: ...@@ -128,6 +128,19 @@ public:
// TODO(TJ): isFmtSuppoted(fmt) // TODO(TJ): isFmtSuppoted(fmt)
return mkldnn::memory::desc(dims, type, fmt); return mkldnn::memory::desc(dims, type, fmt);
} }
void resetMKLDNNOutput(size_t height, size_t width) {
Layer::resetOutput(height, width);
// get valu and grad, use mkldnn matrix instaed
// output_.value;
}
protected:
void setDeviceID(int id) {
deviceId_ = id;
output_.deviceId = id;
// TODO: handle mkldnn device or add mkldnn device to other
}
}; };
} // namespace paddle } // namespace paddle
...@@ -15,13 +15,9 @@ ...@@ -15,13 +15,9 @@
file(GLOB MATH_HEADERS . *.h) file(GLOB MATH_HEADERS . *.h)
file(GLOB MATH_SOURCES . *.cpp) file(GLOB MATH_SOURCES . *.cpp)
message(STATUS "----------MATH_HEADERS:${MATH_HEADERS}")
message(STATUS "----------MATH_SOURCES:${MATH_SOURCES}")
if(NOT WITH_MKLDNN) if(NOT WITH_MKLDNN)
file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h") file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h")
file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp") file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp")
message(STATUS "----------DNN_HEADER:${DNN_HEADER}")
message(STATUS "----------DNN_SOURCES:${DNN_SOURCES}")
list(REMOVE_ITEM MATH_HEADERS ${DNN_HEADER}) list(REMOVE_ITEM MATH_HEADERS ${DNN_HEADER})
list(REMOVE_ITEM MATH_SOURCES ${DNN_SOURCES}) list(REMOVE_ITEM MATH_SOURCES ${DNN_SOURCES})
message(STATUS "Skip compiling with MKLDNNMatrix") message(STATUS "Skip compiling with MKLDNNMatrix")
......
...@@ -16,4 +16,31 @@ limitations under the License. */ ...@@ -16,4 +16,31 @@ limitations under the License. */
using namespace mkldnn; // NOLINT using namespace mkldnn; // NOLINT
namespace paddle {} // namespace paddle namespace paddle {
MKLDNNMatrixPtr MKLDNNMatrix::create(const MatrixPtr& m,
memory::dims dims,
memory::format fmt,
engine& eg,
mkldnn::memory::data_type dtype) {
CpuMatrixPtr cpuM = std::dynamic_pointer_cast<CpuMatrix>(m);
CHECK(cpuM) << "Only support create from CPU matrix yet";
size_t ndims = dims.size();
CHECK(ndims > 0) << "Input dims should not be empty";
size_t cnt = 1;
for (size_t i = 0; i < ndims; ++i) {
cnt *= dims[i];
}
CHECK_EQ(cnt, m->getElementCnt()) << "Count size does not match";
size_t width = m->getWidth();
size_t height = m->getHeight();
real* data = m->getData();
memory::desc md = memory::desc(dims, dtype, fmt);
memory::primitive_desc pd = memory::primitive_desc(md, eg);
return std::make_shared<MKLDNNMatrix>(data, height, width, pd);
}
} // namespace paddle
...@@ -14,9 +14,8 @@ limitations under the License. */ ...@@ -14,9 +14,8 @@ limitations under the License. */
#pragma once #pragma once
//#include "Matrix.h" #include <vector>
#include "Vector.h" #include "Matrix.h"
#include "mkldnn.hpp" #include "mkldnn.hpp"
#include "paddle/parameter/Parameter.h" #include "paddle/parameter/Parameter.h"
...@@ -32,14 +31,42 @@ typedef std::shared_ptr<MKLDNNMatrix> MKLDNNMatrixPtr; ...@@ -32,14 +31,42 @@ typedef std::shared_ptr<MKLDNNMatrix> MKLDNNMatrixPtr;
* @brief MKLDNN Matrix. * @brief MKLDNN Matrix.
* *
*/ */
class MKLDNNMatrix : public CpuVector { class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory {
public: public:
explicit MKLDNNMatrix(size_t size, int fmt) : CpuVector(size), fmt_(fmt) {} MKLDNNMatrix(real* data,
size_t height,
size_t width,
mkldnn::memory::primitive_desc pd)
: CpuMatrix(data, height, width, false), mkldnn::memory(pd, data) {}
~MKLDNNMatrix() {} MKLDNNMatrix(size_t height, size_t width, mkldnn::memory::primitive_desc pd)
: CpuMatrix(height, width, false), mkldnn::memory(pd) {
set_data_handle(CpuMatrix::getData());
}
static MKLDNNMatrixPtr create(
const MatrixPtr& m,
mkldnn::memory::dims dims,
mkldnn::memory::format fmt,
mkldnn::engine& eg,
mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32);
/**
* Get primitive descriptor
*/
mkldnn::memory::primitive_desc getPD() { return this->get_primitive_desc(); }
protected: /**
int fmt_; * Get memory descriptor
*/
mkldnn::memory::desc getMD() { return getPD().desc(); }
/**
* Get format
*/
int getFormat() { return getMD().data.format; }
~MKLDNNMatrix() {}
}; };
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册