提交 4bffbd30 编写于 作者: T tensor-tang

use MKLDNNMatrix in fc forward

上级 62e6dac4
......@@ -41,7 +41,7 @@ namespace paddle {
Layer::Layer(const LayerConfig& config, bool useGpu)
: config_(config),
useGpu_(useGpu),
deviceId_(-1),
deviceId_(CPU_DEVICE),
needSequenceInfo_(true) {}
bool Layer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) {
......
......@@ -59,7 +59,12 @@ protected:
LayerConfig config_;
/// whether to use GPU
bool useGpu_;
/// Device Id. CPU is -1, and GPU is 0, 1, 2 ...
/// Paddle device ID, MKLDNN is -2, CPU is -1
enum PADDLE_DEVICE_ID {
MKLDNN_DEVICE = -2,
CPU_DEVICE = -1,
};
/// Device Id. MKLDNN is -2, CPU is -1, and GPU is 0, 1, 2 ...
int deviceId_;
/// Input layers
std::vector<LayerPtr> inputLayers_;
......@@ -321,6 +326,19 @@ public:
if (deviceId == getDeviceId()) {
return output_;
} else {
bool CPU2MKLDNN =
getDeviceId() == CPU_DEVICE && deviceId == MKLDNN_DEVICE;
bool MKLDNN2CPU =
getDeviceId() == MKLDNN_DEVICE && deviceId == CPU_DEVICE;
if (CPU2MKLDNN) {
// TODO: do something
return output_;
} else if (MKLDNN2CPU) {
// TODO: do something
return output_;
}
// TODO: handle mkldnn device or add mkldnn device to other
for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
if (outputOtherDevice_[i].deviceId == deviceId) {
return outputOtherDevice_[i];
......
......@@ -135,33 +135,51 @@ void MKLDNNFcLayer::reshape() {
void MKLDNNFcLayer::resetFwd() {
bool hasBias = biases_ && biases_->getW();
real* iData = getInputValue(0)->getData();
real* oData = getOutputValue()->getData();
real* wData = weight_->getW()->getData();
real* bData = hasBias ? biases_->getW()->getData() : NULL;
const MatrixPtr& in = getInputValue(0);
const MatrixPtr& wgt = weight_->getW();
const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr;
const MatrixPtr& out = output_.value;
if (getPrev(0)->getDeviceId() == MKLDNN_DEVICE) {
inVal_ = std::dynamic_pointer_cast<MKLDNNMatrix>(in);
CHECK(inVal_) << "Input should be MKLDNNMatrix";
// TODO: change input nchw to nc if available
// inVal_->downSpatial()
} else {
inVal_ = MKLDNNMatrix::create(
in,
hasSpatial_ ? memory::dims{bs_, ic_, ih_, iw_} : memory::dims{bs_, ic_},
hasSpatial_ ? format::nchw : format::nc,
engine_);
}
// TODO(TJ): below create should be covered in MkldnnMatrix
// create memory desc
memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw)
: createMD({bs_, ic_}, format::nc);
memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw)
: createMD({oc_, ic_}, format::oi);
memory::desc bMD = bData != NULL ? createMD({oc_}, format::x)
: createMD({}, format::format_undef);
memory::desc oMD = createMD({bs_, oc_}, format::nc);
wgtVal_ = MKLDNNMatrix::create(
wgt,
hasSpatial_ ? memory::dims{oc_, ic_, ih_, iw_} : memory::dims{oc_, ic_},
hasSpatial_ ? format::oihw : format::oi,
engine_);
// create memory primitive desc and memory self
inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData));
wgtVal_.reset(new memory(memory::primitive_desc(wMD, engine_), wData));
outVal_.reset(new memory(memory::primitive_desc(oMD, engine_), oData));
biasVal_ =
hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr;
outVal_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_);
// change original output to mkldnn output
output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
// create forward handle
prop_kind pk = prop_kind::forward;
fc_fwd::desc fwdDesc = bData != NULL ? fc_fwd::desc(pk, iMD, wMD, bMD, oMD)
: fc_fwd::desc(pk, iMD, wMD, oMD);
fc_fwd::desc fwdDesc =
hasBias ? fc_fwd::desc(pk,
inVal_->getMD(),
wgtVal_->getMD(),
biasVal_->getMD(),
outVal_->getMD())
: fc_fwd::desc(
pk, inVal_->getMD(), wgtVal_->getMD(), outVal_->getMD());
fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_);
if (bData != NULL) {
biasVal_.reset(new memory(memory::primitive_desc(bMD, engine_), bData));
if (hasBias) {
fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_));
} else {
fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_));
......@@ -197,7 +215,8 @@ void MKLDNNFcLayer::resetBwd() {
// update data
inVal_->set_data_handle(iData);
} else {
inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData));
LOG(FATAL) << "Should not be empty";
// inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData));
}
// create memory primitive desc and memory self
......
......@@ -21,7 +21,6 @@ limitations under the License. */
#include "paddle/math/MKLDNNMatrix.h"
DECLARE_bool(use_mkldnn);
DECLARE_bool(use_mkldnn_wgt);
namespace paddle {
......@@ -54,13 +53,14 @@ protected:
std::vector<mkldnn::primitive> pipelineBwd_;
// TODO(TJ): change below memory as MKLDNNMatrixPtr type
std::shared_ptr<mkldnn::memory> inVal_;
// MKLDNNMatrixPtr ;
MKLDNNMatrixPtr inVal_;
std::shared_ptr<mkldnn::memory> inGrad_;
std::shared_ptr<mkldnn::memory> outVal_;
MKLDNNMatrixPtr outVal_;
std::shared_ptr<mkldnn::memory> outGrad_;
std::shared_ptr<mkldnn::memory> wgtVal_;
MKLDNNMatrixPtr wgtVal_;
std::shared_ptr<mkldnn::memory> wgtGrad_;
std::shared_ptr<mkldnn::memory> biasVal_;
MKLDNNMatrixPtr biasVal_;
std::shared_ptr<mkldnn::memory> biasGrad_;
public:
......@@ -94,7 +94,7 @@ public:
stream_.reset(new MKLDNNStream());
engine_ = CPUEngine::Instance().getEngine();
// TODO(TJ): deivecId
setDeviceID(MKLDNN_DEVICE);
return true;
}
......@@ -128,6 +128,19 @@ public:
// TODO(TJ): isFmtSuppoted(fmt)
return mkldnn::memory::desc(dims, type, fmt);
}
void resetMKLDNNOutput(size_t height, size_t width) {
Layer::resetOutput(height, width);
// get valu and grad, use mkldnn matrix instaed
// output_.value;
}
protected:
void setDeviceID(int id) {
deviceId_ = id;
output_.deviceId = id;
// TODO: handle mkldnn device or add mkldnn device to other
}
};
} // namespace paddle
......@@ -15,13 +15,9 @@
file(GLOB MATH_HEADERS . *.h)
file(GLOB MATH_SOURCES . *.cpp)
message(STATUS "----------MATH_HEADERS:${MATH_HEADERS}")
message(STATUS "----------MATH_SOURCES:${MATH_SOURCES}")
if(NOT WITH_MKLDNN)
file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h")
file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp")
message(STATUS "----------DNN_HEADER:${DNN_HEADER}")
message(STATUS "----------DNN_SOURCES:${DNN_SOURCES}")
list(REMOVE_ITEM MATH_HEADERS ${DNN_HEADER})
list(REMOVE_ITEM MATH_SOURCES ${DNN_SOURCES})
message(STATUS "Skip compiling with MKLDNNMatrix")
......
......@@ -16,4 +16,31 @@ limitations under the License. */
using namespace mkldnn; // NOLINT
namespace paddle {} // namespace paddle
namespace paddle {
MKLDNNMatrixPtr MKLDNNMatrix::create(const MatrixPtr& m,
memory::dims dims,
memory::format fmt,
engine& eg,
mkldnn::memory::data_type dtype) {
CpuMatrixPtr cpuM = std::dynamic_pointer_cast<CpuMatrix>(m);
CHECK(cpuM) << "Only support create from CPU matrix yet";
size_t ndims = dims.size();
CHECK(ndims > 0) << "Input dims should not be empty";
size_t cnt = 1;
for (size_t i = 0; i < ndims; ++i) {
cnt *= dims[i];
}
CHECK_EQ(cnt, m->getElementCnt()) << "Count size does not match";
size_t width = m->getWidth();
size_t height = m->getHeight();
real* data = m->getData();
memory::desc md = memory::desc(dims, dtype, fmt);
memory::primitive_desc pd = memory::primitive_desc(md, eg);
return std::make_shared<MKLDNNMatrix>(data, height, width, pd);
}
} // namespace paddle
......@@ -14,9 +14,8 @@ limitations under the License. */
#pragma once
//#include "Matrix.h"
#include "Vector.h"
#include <vector>
#include "Matrix.h"
#include "mkldnn.hpp"
#include "paddle/parameter/Parameter.h"
......@@ -32,14 +31,42 @@ typedef std::shared_ptr<MKLDNNMatrix> MKLDNNMatrixPtr;
* @brief MKLDNN Matrix.
*
*/
class MKLDNNMatrix : public CpuVector {
class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory {
public:
explicit MKLDNNMatrix(size_t size, int fmt) : CpuVector(size), fmt_(fmt) {}
MKLDNNMatrix(real* data,
size_t height,
size_t width,
mkldnn::memory::primitive_desc pd)
: CpuMatrix(data, height, width, false), mkldnn::memory(pd, data) {}
~MKLDNNMatrix() {}
MKLDNNMatrix(size_t height, size_t width, mkldnn::memory::primitive_desc pd)
: CpuMatrix(height, width, false), mkldnn::memory(pd) {
set_data_handle(CpuMatrix::getData());
}
static MKLDNNMatrixPtr create(
const MatrixPtr& m,
mkldnn::memory::dims dims,
mkldnn::memory::format fmt,
mkldnn::engine& eg,
mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32);
/**
* Get primitive descriptor
*/
mkldnn::memory::primitive_desc getPD() { return this->get_primitive_desc(); }
protected:
int fmt_;
/**
* Get memory descriptor
*/
mkldnn::memory::desc getMD() { return getPD().desc(); }
/**
* Get format
*/
int getFormat() { return getMD().data.format; }
~MKLDNNMatrix() {}
};
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册