Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
322d9ad8
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
322d9ad8
编写于
8月 30, 2017
作者:
T
Tao Luo
提交者:
GitHub
8月 30, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3712 from tensor-tang/merge
add MKLDNN_DEVICE
上级
b45d020f
c5183caa
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
609 addition
and
134 deletion
+609
-134
cmake/external/mkldnn.cmake
cmake/external/mkldnn.cmake
+1
-1
cmake/external/mklml.cmake
cmake/external/mklml.cmake
+1
-1
paddle/gserver/layers/Layer.cpp
paddle/gserver/layers/Layer.cpp
+1
-1
paddle/gserver/layers/Layer.h
paddle/gserver/layers/Layer.h
+28
-1
paddle/gserver/layers/MKLDNNFcLayer.cpp
paddle/gserver/layers/MKLDNNFcLayer.cpp
+118
-102
paddle/gserver/layers/MKLDNNFcLayer.h
paddle/gserver/layers/MKLDNNFcLayer.h
+3
-4
paddle/gserver/layers/MKLDNNLayer.h
paddle/gserver/layers/MKLDNNLayer.h
+143
-23
paddle/math/Allocator.h
paddle/math/Allocator.h
+6
-0
paddle/math/CMakeLists.txt
paddle/math/CMakeLists.txt
+11
-0
paddle/math/MKLDNNMatrix.cpp
paddle/math/MKLDNNMatrix.cpp
+144
-0
paddle/math/MKLDNNMatrix.h
paddle/math/MKLDNNMatrix.h
+148
-0
paddle/parameter/Parameter.h
paddle/parameter/Parameter.h
+5
-1
未找到文件。
cmake/external/mkldnn.cmake
浏览文件 @
322d9ad8
...
...
@@ -51,7 +51,7 @@ ExternalProject_Add(
${
EXTERNAL_PROJECT_LOG_ARGS
}
DEPENDS
${
MKLDNN_DEPENDS
}
GIT_REPOSITORY
"https://github.com/01org/mkl-dnn.git"
GIT_TAG
"v0.
9
"
GIT_TAG
"v0.
10
"
PREFIX
${
MKLDNN_SOURCES_DIR
}
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
MKLDNN_INSTALL_DIR
}
...
...
cmake/external/mklml.cmake
浏览文件 @
322d9ad8
...
...
@@ -28,7 +28,7 @@ INCLUDE(ExternalProject)
SET
(
MKLML_PROJECT
"extern_mklml"
)
SET
(
MKLML_VER
"mklml_lnx_2018.0.20170720"
)
SET
(
MKLML_URL
"https://github.com/01org/mkl-dnn/releases/download/v0.
9
/
${
MKLML_VER
}
.tgz"
)
SET
(
MKLML_URL
"https://github.com/01org/mkl-dnn/releases/download/v0.
10
/
${
MKLML_VER
}
.tgz"
)
SET
(
MKLML_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/mklml"
)
SET
(
MKLML_DOWNLOAD_DIR
"
${
MKLML_SOURCE_DIR
}
/src/
${
MKLML_PROJECT
}
"
)
SET
(
MKLML_DST_DIR
"mklml"
)
...
...
paddle/gserver/layers/Layer.cpp
浏览文件 @
322d9ad8
...
...
@@ -41,7 +41,7 @@ namespace paddle {
Layer
::
Layer
(
const
LayerConfig
&
config
,
bool
useGpu
)
:
config_
(
config
),
useGpu_
(
useGpu
),
deviceId_
(
-
1
),
deviceId_
(
CPU_DEVICE
),
needSequenceInfo_
(
true
)
{}
bool
Layer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
...
...
paddle/gserver/layers/Layer.h
浏览文件 @
322d9ad8
...
...
@@ -59,7 +59,12 @@ protected:
LayerConfig
config_
;
/// whether to use GPU
bool
useGpu_
;
/// Device Id. CPU is -1, and GPU is 0, 1, 2 ...
/// Paddle device ID, MKLDNN is -2, CPU is -1
enum
PADDLE_DEVICE_ID
{
MKLDNN_DEVICE
=
-
2
,
CPU_DEVICE
=
-
1
,
};
/// Device Id. MKLDNN is -2, CPU is -1, and GPU is 0, 1, 2 ...
int
deviceId_
;
/// Input layers
std
::
vector
<
LayerPtr
>
inputLayers_
;
...
...
@@ -77,6 +82,7 @@ protected:
Argument
output_
;
/// Several outputs stored on different devices, used in 'parallel_nn' case,
/// and record them by deviceId_.
/// Also used in 'use_mkldnn' case.
std
::
vector
<
Argument
>
outputOtherDevice_
;
/// If there are several outputs, map them by each name.
std
::
map
<
std
::
string
,
Argument
*>
outputMap_
;
...
...
@@ -172,6 +178,13 @@ protected:
return
inputLayer
.
getOutput
(
deviceId_
);
}
/**
* Get the argument of input layer with deviceId.
*/
const
Argument
&
getInput
(
size_t
inputIndex
,
int
deviceId
)
const
{
return
inputLayers_
[
inputIndex
]
->
getOutput
(
deviceId
);
}
/**
* Get the forward-input value.
*/
...
...
@@ -186,6 +199,13 @@ protected:
return
inputLayer
.
getOutput
(
deviceId_
).
value
;
}
/**
* Get the forward-input value with deviceId.
*/
const
MatrixPtr
&
getInputValue
(
int
inputIndex
,
int
deviceId
)
{
return
inputLayers_
[
inputIndex
]
->
getOutput
(
deviceId
).
value
;
}
/**
* Get the forward-input grad.
*/
...
...
@@ -200,6 +220,13 @@ protected:
return
inputLayer
.
getOutput
(
deviceId_
).
grad
;
}
/**
* Get the forward-input grad.
*/
const
MatrixPtr
&
getInputGrad
(
int
inputIndex
,
int
deviceId
)
{
return
inputLayers_
[
inputIndex
]
->
getOutput
(
deviceId
).
grad
;
}
/**
* Get the forward-input label.
*/
...
...
paddle/gserver/layers/MKLDNNFcLayer.cpp
浏览文件 @
322d9ad8
...
...
@@ -61,43 +61,42 @@ void MKLDNNFcLayer::convertWeightsFromPaddle() {
return
;
}
// TODO(TJ): dst format should get from wgtVal_
int
dstFmt
=
PARAM_FORMAT_MKLDNN_OI
;
int
srcFmt
=
weight_
->
getParameterPtr
()
->
getHeaderFormat
();
if
(
srcFmt
==
dstFmt
)
{
return
;
}
// The weight_ is transposed from initial paddle weight
MatrixPtr
paddleWgt
=
Matrix
::
create
(
weight_
->
getW
()
->
getData
(),
iLayerSize_
,
oc_
,
false
,
false
);
// TODO(TJ): remove this print when do not need differ weights
std
::
ostringstream
ostr
;
paddleWgt
->
print
(
ostr
);
VLOG
(
MKLDNN_ALL
)
<<
"Initial Weight from paddle: "
<<
std
::
endl
<<
ostr
.
str
();
// The mkldnn weight is transposed from initial paddle matrix
MatrixPtr
paddleWgtT
;
paddleWgt
->
transpose
(
paddleWgtT
,
true
);
weight_
->
getW
()
->
copyFrom
(
*
paddleWgtT
);
weight_
->
getParameterPtr
()
->
setHeaderFormat
(
dstFmt
);
CHECK
(
wgtVal_
)
<<
"should have been initialized"
;
bool
hasNoSpatial_
=
ih_
==
1
&&
iw_
==
1
;
auto
targetDim
=
wgtVal_
->
getDims
();
auto
srcFmt
=
hasNoSpatial_
?
memory
::
format
::
io
:
memory
::
format
::
ihwo
;
wgtVal_
->
reorderDataFrom
(
wgtVal_
,
srcFmt
,
targetDim
);
hasInitedWgt_
=
true
;
}
void
MKLDNNFcLayer
::
convertWeightsToPaddle
()
{
MatrixPtr
dnnWgt
=
weight_
->
getW
();
MatrixPtr
paddleWgt
;
dnnWgt
->
transpose
(
paddleWgt
,
true
);
// copy paddle weight and override on weight_
MatrixPtr
dnnWgtT
=
Matrix
::
create
(
dnnWgt
->
getData
(),
dnnWgt
->
getWidth
(),
dnnWgt
->
getHeight
(),
false
,
false
);
dnnWgtT
->
copyFrom
(
*
paddleWgt
);
CHECK
(
wgtVal_
)
<<
"should have been initialized"
;
bool
hasNoSpatial_
=
ih_
==
1
&&
iw_
==
1
;
auto
targetDim
=
wgtVal_
->
getDims
();
auto
dstFmt
=
hasNoSpatial_
?
memory
::
format
::
io
:
memory
::
format
::
ihwo
;
wgtVal_
->
reorderDataTo
(
wgtVal_
,
dstFmt
,
targetDim
);
}
void
MKLDNNFcLayer
::
convertOutputToOtherDevice
()
{
copyOutputInfoToOtherDevice
();
// find other cpu device and reorder output to cpu device
int
cnt
=
0
;
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
if
(
outputOtherDevice_
[
i
].
deviceId
==
CPU_DEVICE
)
{
// fc cpu output value do not need convert
// just share point
outputOtherDevice_
[
i
].
value
=
output_
.
value
;
++
cnt
;
}
}
if
(
cnt
>
1
)
{
LOG
(
WARNING
)
<<
"should not have more than one CPU devie"
;
}
}
void
MKLDNNFcLayer
::
reshape
()
{
const
Argument
&
input
=
getInput
(
0
);
const
Argument
&
input
=
getInput
(
0
,
getPrev
(
0
)
->
getDeviceId
()
);
int
batchSize
=
input
.
getBatchSize
();
if
(
bs_
==
batchSize
)
{
return
;
...
...
@@ -111,10 +110,6 @@ void MKLDNNFcLayer::reshape() {
if
(
iw_
==
0
)
{
iw_
=
1
;
}
hasSpatial_
=
true
;
if
(
ih_
==
1
&&
iw_
==
1
)
{
hasSpatial_
=
false
;
}
CHECK_EQ
(
iLayerSize_
,
inputLayers_
[
0
]
->
getSize
());
ic_
=
iLayerSize_
/
(
ih_
*
iw_
);
CHECK_EQ
(
size_t
(
ic_
*
ih_
*
iw_
),
iLayerSize_
)
<<
"not divisible"
;
...
...
@@ -135,37 +130,53 @@ void MKLDNNFcLayer::reshape() {
void
MKLDNNFcLayer
::
resetFwd
()
{
bool
hasBias
=
biases_
&&
biases_
->
getW
();
real
*
iData
=
getInputValue
(
0
)
->
getData
();
real
*
oData
=
getOutputValue
()
->
getData
();
real
*
wData
=
weight_
->
getW
()
->
getData
();
real
*
bData
=
hasBias
?
biases_
->
getW
()
->
getData
()
:
NULL
;
// TODO(TJ): below create should be covered in MkldnnMatrix
// create memory desc
memory
::
desc
iMD
=
hasSpatial_
?
createMD
({
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
)
:
createMD
({
bs_
,
ic_
},
format
::
nc
);
memory
::
desc
wMD
=
hasSpatial_
?
createMD
({
oc_
,
ic_
,
ih_
,
iw_
},
format
::
oihw
)
:
createMD
({
oc_
,
ic_
},
format
::
oi
);
memory
::
desc
bMD
=
bData
!=
NULL
?
createMD
({
oc_
},
format
::
x
)
:
createMD
({},
format
::
format_undef
);
memory
::
desc
oMD
=
createMD
({
bs_
,
oc_
},
format
::
nc
);
// create memory primitive desc and memory self
inVal_
.
reset
(
new
memory
(
memory
::
primitive_desc
(
iMD
,
engine_
),
iData
));
wgtVal_
.
reset
(
new
memory
(
memory
::
primitive_desc
(
wMD
,
engine_
),
wData
));
outVal_
.
reset
(
new
memory
(
memory
::
primitive_desc
(
oMD
,
engine_
),
oData
));
const
MatrixPtr
&
wgt
=
weight_
->
getW
();
const
MatrixPtr
&
bias
=
hasBias
?
biases_
->
getW
()
:
nullptr
;
const
MatrixPtr
&
out
=
output_
.
value
;
if
(
inputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
in
=
getInputValue
(
0
);
inVal_
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
in
);
CHECK
(
inVal_
)
<<
"Input should be MKLDNNMatrix"
;
}
else
{
CHECK_EQ
(
getPrev
(
0
)
->
getDeviceId
(),
CPU_DEVICE
)
<<
"Only support CPU yet"
;
const
MatrixPtr
&
in
=
getInputValue
(
0
,
CPU_DEVICE
);
inVal_
=
MKLDNNMatrix
::
create
(
in
,
memory
::
dims
{
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
,
engine_
);
}
inVal_
->
downSpatial
();
wgtVal_
=
MKLDNNMatrix
::
create
(
wgt
,
memory
::
dims
{
oc_
,
ic_
,
ih_
,
iw_
},
format
::
oihw
,
engine_
);
wgtVal_
->
downSpatial
();
biasVal_
=
hasBias
?
MKLDNNMatrix
::
create
(
bias
,
{
oc_
},
format
::
x
,
engine_
)
:
nullptr
;
outVal_
=
MKLDNNMatrix
::
create
(
out
,
{
bs_
,
oc_
},
format
::
nc
,
engine_
);
// change original output value to mkldnn output value
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
outVal_
);
if
(
!
outputIsOnlyMKLDNN
())
{
convertOutputToOtherDevice
();
}
// create forward handle
prop_kind
pk
=
prop_kind
::
forward
;
fc_fwd
::
desc
fwdDesc
=
bData
!=
NULL
?
fc_fwd
::
desc
(
pk
,
iMD
,
wMD
,
bMD
,
oMD
)
:
fc_fwd
::
desc
(
pk
,
iMD
,
wMD
,
oMD
);
fc_fwd
::
desc
fwdDesc
=
hasBias
?
fc_fwd
::
desc
(
pk
,
inVal_
->
getMemoryDesc
(),
wgtVal_
->
getMemoryDesc
(),
biasVal_
->
getMemoryDesc
(),
outVal_
->
getMemoryDesc
())
:
fc_fwd
::
desc
(
pk
,
inVal_
->
getMemoryDesc
(),
wgtVal_
->
getMemoryDesc
(),
outVal_
->
getMemoryDesc
());
fc_fwd
::
primitive_desc
fwdPD
=
fc_fwd
::
primitive_desc
(
fwdDesc
,
engine_
);
if
(
bData
!=
NULL
)
{
biasVal_
.
reset
(
new
memory
(
memory
::
primitive_desc
(
bMD
,
engine_
),
bData
));
if
(
hasBias
)
{
fwd_
.
reset
(
new
fc_fwd
(
fwdPD
,
*
inVal_
,
*
wgtVal_
,
*
biasVal_
,
*
outVal_
));
}
else
{
fwd_
.
reset
(
new
fc_fwd
(
fwdPD
,
*
inVal_
,
*
wgtVal_
,
*
outVal_
));
}
printValueFormatFlow
();
pipelineFwd_
.
clear
();
pipelineFwd_
.
push_back
(
*
fwd_
);
}
...
...
@@ -175,45 +186,46 @@ void MKLDNNFcLayer::resetBwd() {
return
;
}
needResetBwd_
=
false
;
bool
hasBias
=
biases_
&&
biases_
->
getWGrad
();
real
*
iData
=
getInputValue
(
0
)
->
getData
();
real
*
iDiff
=
getInputGrad
(
0
)
!=
nullptr
?
getInputGrad
(
0
)
->
getData
()
:
NULL
;
real
*
oDiff
=
getOutputGrad
()
->
getData
();
real
*
wDiff
=
weight_
->
getWGrad
()
->
getData
();
real
*
bDiff
=
hasBias
?
biases_
->
getWGrad
()
->
getData
()
:
NULL
;
/// backward weight
// create memory desc for backward memory
memory
::
desc
iMD
=
hasSpatial_
?
createMD
({
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
)
:
createMD
({
bs_
,
ic_
},
format
::
nc
);
memory
::
desc
wMD
=
hasSpatial_
?
createMD
({
oc_
,
ic_
,
ih_
,
iw_
},
format
::
oihw
)
:
createMD
({
oc_
,
ic_
},
format
::
oi
);
memory
::
desc
oMD
=
createMD
({
bs_
,
oc_
},
format
::
nc
);
memory
::
desc
bMD
=
bDiff
!=
NULL
?
createMD
({
oc_
},
format
::
x
)
:
createMD
({},
format
::
format_undef
);
if
(
inVal_
)
{
// update data
inVal_
->
set_data_handle
(
iData
);
}
else
{
inVal_
.
reset
(
new
memory
(
memory
::
primitive_desc
(
iMD
,
engine_
),
iData
));
}
// create memory primitive desc and memory self
wgtGrad_
.
reset
(
new
memory
(
memory
::
primitive_desc
(
wMD
,
engine_
),
wDiff
));
outGrad_
.
reset
(
new
memory
(
memory
::
primitive_desc
(
oMD
,
engine_
),
oDiff
));
fc_fwd
::
desc
fwdDesc
=
fc_fwd
::
desc
(
prop_kind
::
forward
,
iMD
,
wMD
,
oMD
);
CHECK
(
inVal_
)
<<
"Should have input value"
;
const
MatrixPtr
&
wgt
=
weight_
->
getWGrad
();
const
MatrixPtr
&
bias
=
hasBias
?
biases_
->
getWGrad
()
:
nullptr
;
// TODO(TJ): merge outgrad
int
device
=
outputIsOnlyMKLDNN
()
?
MKLDNN_DEVICE
:
CPU_DEVICE
;
// for MKLDNN device:
// can not directly cast outputgrad to mkldnnmatrix,
// since each layer can not write the inputgrad to mkldnn inputgrad.
// So just create from matrix with outputvalue format.
// for CPU device:
// fc do not need to convert from cpu device since output is always nc format
// only need create from cpu device
const
MatrixPtr
&
out
=
getOutput
(
device
).
grad
;
outGrad_
=
MKLDNNMatrix
::
create
(
out
,
outVal_
->
getPrimitiveDesc
());
wgtGrad_
=
MKLDNNMatrix
::
create
(
wgt
,
wgtVal_
->
getPrimitiveDesc
());
biasGrad_
=
hasBias
?
MKLDNNMatrix
::
create
(
bias
,
biasVal_
->
getPrimitiveDesc
())
:
nullptr
;
// create memory primitive desc
fc_fwd
::
desc
fwdDesc
=
fc_fwd
::
desc
(
prop_kind
::
forward
,
inVal_
->
getMemoryDesc
(),
wgtGrad_
->
getMemoryDesc
(),
outGrad_
->
getMemoryDesc
());
fc_fwd
::
primitive_desc
fwdPD
=
fc_fwd
::
primitive_desc
(
fwdDesc
,
engine_
);
fc_bwdWgt
::
desc
bwdWgtDesc
=
bDiff
!=
NULL
?
fc_bwdWgt
::
desc
(
iMD
,
wMD
,
bMD
,
oMD
)
:
fc_bwdWgt
::
desc
(
iMD
,
wMD
,
oMD
);
fc_bwdWgt
::
desc
bwdWgtDesc
=
hasBias
?
fc_bwdWgt
::
desc
(
inVal_
->
getMemoryDesc
(),
wgtGrad_
->
getMemoryDesc
(),
biasGrad_
->
getMemoryDesc
(),
outGrad_
->
getMemoryDesc
())
:
fc_bwdWgt
::
desc
(
inVal_
->
getMemoryDesc
(),
wgtGrad_
->
getMemoryDesc
(),
outGrad_
->
getMemoryDesc
());
fc_bwdWgt
::
primitive_desc
bwdWgtPD
=
fc_bwdWgt
::
primitive_desc
(
bwdWgtDesc
,
engine_
,
fwdPD
);
if
(
bDiff
!=
NULL
)
{
biasGrad_
.
reset
(
new
memory
(
memory
::
primitive_desc
(
bMD
,
engine_
),
bDiff
));
if
(
hasBias
)
{
bwdWgt_
.
reset
(
new
fc_bwdWgt
(
bwdWgtPD
,
*
inVal_
,
*
outGrad_
,
*
wgtGrad_
,
*
biasGrad_
));
}
else
{
...
...
@@ -223,15 +235,26 @@ void MKLDNNFcLayer::resetBwd() {
pipelineBwd_
.
push_back
(
*
bwdWgt_
);
/// backward data
if
(
iDiff
==
NULL
)
{
device
=
inputIsOnlyMKLDNN
()
?
MKLDNN_DEVICE
:
CPU_DEVICE
;
const
MatrixPtr
&
in
=
getInputGrad
(
0
,
device
);
if
(
in
==
nullptr
)
{
return
;
}
fc_bwdData
::
desc
bwdDataDesc
=
fc_bwdData
::
desc
(
iMD
,
wMD
,
oMD
);
if
(
getInput
(
0
,
device
).
getAllCount
()
>
1
)
{
// TODO(TJ): use outputMaps_ ways when merge outgrad done
}
else
{
inGrad_
=
MKLDNNMatrix
::
create
(
in
,
inVal_
->
getPrimitiveDesc
());
}
fc_bwdData
::
desc
bwdDataDesc
=
fc_bwdData
::
desc
(
inVal_
->
getMemoryDesc
(),
wgtGrad_
->
getMemoryDesc
(),
outGrad_
->
getMemoryDesc
());
fc_bwdData
::
primitive_desc
bwdDataPD
=
fc_bwdData
::
primitive_desc
(
bwdDataDesc
,
engine_
,
fwdPD
);
inGrad_
.
reset
(
new
memory
(
memory
::
primitive_desc
(
iMD
,
engine_
),
iDiff
));
CHECK
(
wgtVal_
)
<<
"Should have weight memory"
;
bwdData_
.
reset
(
new
fc_bwdData
(
bwdDataPD
,
*
outGrad_
,
*
wgtVal_
,
*
inGrad_
));
printGradFormatFlow
();
pipelineBwd_
.
push_back
(
*
bwdData_
);
}
...
...
@@ -241,11 +264,7 @@ void MKLDNNFcLayer::forward(PassType passType) {
{
REGISTER_TIMER_INFO
(
"mkldnn_FwdTimer"
,
getName
().
c_str
());
// update input data
// since it might be changed if this is after data layer
real
*
iData
=
getInputValue
(
0
)
->
getData
();
inVal_
->
set_data_handle
(
iData
);
syncInputValue
();
// just submit forward pipeline
stream_
->
submit
(
pipelineFwd_
);
...
...
@@ -267,10 +286,7 @@ void MKLDNNFcLayer::backward(const UpdateCallback& callback) {
REGISTER_TIMER_INFO
(
"mkldnn_bwdTimer"
,
getName
().
c_str
());
resetBwd
();
// update diff
real
*
oDiff
=
getOutputGrad
()
->
getData
();
outGrad_
->
set_data_handle
(
oDiff
);
syncOutputGrad
();
// just sumbmit backward pipeline
stream_
->
submit
(
pipelineBwd_
);
}
...
...
paddle/gserver/layers/MKLDNNFcLayer.h
浏览文件 @
322d9ad8
...
...
@@ -32,16 +32,13 @@ protected:
// if has already init the weight
bool
hasInitedWgt_
;
// if input layer has image size info (ih>1 && iw>1)
bool
hasSpatial_
;
// fc weight and bias
std
::
unique_ptr
<
Weight
>
weight_
;
std
::
unique_ptr
<
Weight
>
biases_
;
public:
explicit
MKLDNNFcLayer
(
const
LayerConfig
&
config
)
:
MKLDNNLayer
(
config
),
hasInitedWgt_
(
false
)
,
hasSpatial_
(
true
)
{}
:
MKLDNNLayer
(
config
),
hasInitedWgt_
(
false
)
{}
~
MKLDNNFcLayer
()
{}
...
...
@@ -75,6 +72,8 @@ protected:
* only would be called when needed
*/
void
resetBwd
();
void
convertOutputToOtherDevice
()
override
;
};
}
// namespace paddle
paddle/gserver/layers/MKLDNNLayer.h
浏览文件 @
322d9ad8
...
...
@@ -18,9 +18,9 @@ limitations under the License. */
#include "Layer.h"
#include "MKLDNNBase.h"
#include "mkldnn.hpp"
#include "paddle/math/MKLDNNMatrix.h"
DECLARE_bool
(
use_mkldnn
);
DECLARE_bool
(
use_mkldnn_wgt
);
namespace
paddle
{
...
...
@@ -52,15 +52,15 @@ protected:
std
::
vector
<
mkldnn
::
primitive
>
pipelineFwd_
;
std
::
vector
<
mkldnn
::
primitive
>
pipelineBwd_
;
//
TODO(TJ): change below memory as MKLDNNMatrixPtr type
std
::
shared_ptr
<
mkldnn
::
memory
>
inVal_
;
std
::
shared_ptr
<
mkldnn
::
memory
>
inGrad_
;
std
::
shared_ptr
<
mkldnn
::
memory
>
outVal_
;
std
::
shared_ptr
<
mkldnn
::
memory
>
outGrad_
;
std
::
shared_ptr
<
mkldnn
::
memory
>
wgtVal_
;
std
::
shared_ptr
<
mkldnn
::
memory
>
wgtGrad_
;
std
::
shared_ptr
<
mkldnn
::
memory
>
biasVal_
;
std
::
shared_ptr
<
mkldnn
::
memory
>
biasGrad_
;
//
MKLDNNMatrixPtr
MKLDNNMatrixPtr
inVal_
;
MKLDNNMatrixPtr
inGrad_
;
MKLDNNMatrixPtr
outVal_
;
MKLDNNMatrixPtr
outGrad_
;
MKLDNNMatrixPtr
wgtVal_
;
MKLDNNMatrixPtr
wgtGrad_
;
MKLDNNMatrixPtr
biasVal_
;
MKLDNNMatrixPtr
biasGrad_
;
public:
explicit
MKLDNNLayer
(
const
LayerConfig
&
config
)
...
...
@@ -83,17 +83,21 @@ public:
virtual
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
CHECK
(
FLAGS_use_mkldnn
)
<<
"MkldnnLayers only support use_mkldnn."
<<
"Please set WITH_MKLDNN=ON "
<<
"and set use_mkldnn=True"
;
CHECK
(
!
useGpu_
)
<<
"Do not support GPU yet"
;
// set device id before Layer::init
setDevice
(
MKLDNN_DEVICE
);
// change param device to MKLDNN device
setParamsDevice
(
MKLDNN_DEVICE
,
parameterMap
);
if
(
!
Layer
::
init
(
layerMap
,
parameterMap
))
{
return
false
;
}
CHECK
(
FLAGS_use_mkldnn
)
<<
"MkldnnLayers only support use_mkldnn."
<<
"Please set WITH_MKLDNN=ON "
<<
"and set use_mkldnn=True"
;
stream_
.
reset
(
new
MKLDNNStream
());
engine_
=
CPUEngine
::
Instance
().
getEngine
();
// TODO(TJ): deivecId
return
true
;
}
...
...
@@ -109,6 +113,12 @@ public:
*/
virtual
void
convertWeightsToPaddle
()
{}
/**
* convert MKLDNN output to other device.
* only support CPU device yet
*/
virtual
void
convertOutputToOtherDevice
()
{}
/**
* print info about sizes
*/
...
...
@@ -118,14 +128,124 @@ public:
<<
", oh: "
<<
oh_
<<
", ow: "
<<
ow_
;
}
// TODO(TJ): move to MkldnnMatrix
// create memory desc
inline
mkldnn
::
memory
::
desc
createMD
(
mkldnn
::
memory
::
dims
dims
,
mkldnn
::
memory
::
format
fmt
,
mkldnn
::
memory
::
data_type
type
=
mkldnn
::
memory
::
data_type
::
f32
)
{
// TODO(TJ): isFmtSuppoted(fmt)
return
mkldnn
::
memory
::
desc
(
dims
,
type
,
fmt
);
/**
* Print the mkldnn memory format flow of value
*/
virtual
void
printValueFormatFlow
()
{
if
(
inVal_
&&
outVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"value format flow --- "
<<
inVal_
->
getFormat
()
<<
" >>> "
<<
outVal_
->
getFormat
();
}
}
/**
* Print the mkldnn memory format flow of grad
*/
virtual
void
printGradFormatFlow
()
{
if
(
inGrad_
&&
outGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"grad format flow --- "
<<
inGrad_
->
getFormat
()
<<
" <<< "
<<
outGrad_
->
getFormat
();
}
}
protected:
/**
* copy image size and sequence info to other device
* @note: can not directly use Layer::copyOutputToOtherDevice since here only
* copy base info and do not copy data value
*/
void
copyOutputInfoToOtherDevice
()
{
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
outputOtherDevice_
[
i
].
setFrameHeight
(
output_
.
getFrameHeight
());
outputOtherDevice_
[
i
].
setFrameWidth
(
output_
.
getFrameWidth
());
outputOtherDevice_
[
i
].
sequenceStartPositions
=
output_
.
sequenceStartPositions
;
outputOtherDevice_
[
i
].
subSequenceStartPositions
=
output_
.
subSequenceStartPositions
;
outputOtherDevice_
[
i
].
cpuSequenceDims
=
output_
.
cpuSequenceDims
;
}
}
/**
* If input only has MKLDNN device.
* Otherwise, only support the previous layer using CPU device.
*/
bool
inputIsOnlyMKLDNN
(
int
index
=
0
)
{
int
prevDevice
=
getPrev
(
index
)
->
getDeviceId
();
if
(
prevDevice
==
MKLDNN_DEVICE
)
{
return
true
;
}
else
{
// do not support GPU yet
CHECK_EQ
(
prevDevice
,
CPU_DEVICE
)
<<
"Only support CPU yet"
;
return
false
;
}
}
/**
* If output only has MKLDNN device.
* Otherwise, other devices should only using CPU device.
*/
bool
outputIsOnlyMKLDNN
()
{
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
CHECK_EQ
(
outputOtherDevice_
[
i
].
deviceId
,
CPU_DEVICE
)
<<
"Only support other device is CPU yet"
;
}
return
outputOtherDevice_
.
size
()
==
0
;
}
/**
* Sync input value data
*/
void
syncInputValue
()
{
if
(
inputIsOnlyMKLDNN
())
{
return
;
}
real
*
iData
=
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
();
// update input data
// since it might be changed if this is after data layer
inVal_
->
updateData
(
iData
);
}
/**
* Sync output grad data
*/
void
syncOutputGrad
()
{
if
(
outputIsOnlyMKLDNN
())
{
return
;
}
// update diff
real
*
oDiff
=
getOutput
(
CPU_DEVICE
).
grad
->
getData
();
outGrad_
->
updateData
(
oDiff
);
}
/**
* Set deviceId of this layer.
*/
void
setDevice
(
int
id
)
{
deviceId_
=
id
;
}
/**
* Set deviceId of the params used in this layer.
*/
void
setParamsDevice
(
int
id
,
const
ParameterMap
&
parameterMap
)
{
for
(
auto
&
inputConfig
:
config_
.
inputs
())
{
if
(
inputConfig
.
has_input_parameter_name
())
{
ParameterPtr
parameter
;
std
::
string
name
=
inputConfig
.
input_parameter_name
();
CHECK
(
mapGet
(
name
,
parameterMap
,
&
parameter
))
<<
"Cannot find input parameter "
<<
name
<<
" for layer "
<<
getName
();
parameter
->
setDevice
(
id
);
}
}
if
(
config_
.
has_bias_parameter_name
())
{
ParameterPtr
parameter
;
std
::
string
name
=
config_
.
bias_parameter_name
();
CHECK
(
mapGet
(
name
,
parameterMap
,
&
parameter
))
<<
"Cannot find bias parameter "
<<
name
<<
" for layer "
<<
getName
();
parameter
->
setDevice
(
id
);
}
}
};
...
...
paddle/math/Allocator.h
浏览文件 @
322d9ad8
...
...
@@ -48,7 +48,13 @@ public:
*/
virtual
void
*
alloc
(
size_t
size
)
{
void
*
ptr
;
#ifdef PADDLE_USE_MKLDNN
// refer to https://github.com/01org/mkl-dnn/blob/master/include/mkldnn.hpp
// memory alignment
CHECK_EQ
(
posix_memalign
(
&
ptr
,
4096ul
,
size
),
0
);
#else
CHECK_EQ
(
posix_memalign
(
&
ptr
,
32ul
,
size
),
0
);
#endif
CHECK
(
ptr
)
<<
"Fail to allocate CPU memory: size="
<<
size
;
return
ptr
;
}
...
...
paddle/math/CMakeLists.txt
浏览文件 @
322d9ad8
...
...
@@ -14,6 +14,17 @@
#
file
(
GLOB MATH_HEADERS . *.h
)
file
(
GLOB MATH_SOURCES . *.cpp
)
if
(
NOT WITH_MKLDNN
)
set
(
DNN_HEADER
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/MKLDNNMatrix.h"
)
set
(
DNN_SOURCE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/MKLDNNMatrix.cpp"
)
list
(
REMOVE_ITEM MATH_HEADERS
"
${
DNN_HEADER
}
"
)
list
(
REMOVE_ITEM MATH_SOURCES
"
${
DNN_SOURCE
}
"
)
message
(
STATUS
"Skip compiling with MKLDNNMatrix"
)
else
()
message
(
STATUS
"Compile with MKLDNNMatrix"
)
endif
()
set
(
MATH_SOURCES
"
${
PADDLE_SOURCE_DIR
}
/paddle/math/BaseMatrix.cu"
"
${
PADDLE_SOURCE_DIR
}
/paddle/math/TrainingAlgorithmOp.cu"
...
...
paddle/math/MKLDNNMatrix.cpp
0 → 100644
浏览文件 @
322d9ad8
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MKLDNNMatrix.h"
using
namespace
mkldnn
;
// NOLINT
namespace
paddle
{
MKLDNNMatrixPtr
MKLDNNMatrix
::
create
(
MatrixPtr
m
,
memory
::
primitive_desc
pd
)
{
memory
::
desc
md
=
pd
.
desc
();
size_t
ndims
=
md
.
data
.
ndims
;
int
*
dims
=
md
.
data
.
dims
;
CHECK
(
ndims
>
0
)
<<
"Input dims should not be empty"
;
size_t
cnts
=
1
;
for
(
size_t
i
=
0
;
i
<
ndims
;
++
i
)
{
cnts
*=
dims
[
i
];
}
if
(
m
==
nullptr
)
{
size_t
height
=
dims
[
0
];
size_t
width
=
cnts
/
dims
[
0
];
m
=
Matrix
::
create
(
height
,
width
,
false
,
false
);
}
CHECK
(
m
)
<<
" Matrix should not be empty"
;
CpuMatrixPtr
cpuMatrix
=
std
::
dynamic_pointer_cast
<
CpuMatrix
>
(
m
);
CHECK
(
cpuMatrix
)
<<
"Only support create from CPU matrix yet"
;
CHECK_EQ
(
cnts
,
m
->
getElementCnt
())
<<
"Count size does not match"
;
return
std
::
make_shared
<
MKLDNNMatrix
>
(
m
->
getData
(),
m
->
getHeight
(),
m
->
getWidth
(),
pd
);
}
MKLDNNMatrixPtr
MKLDNNMatrix
::
create
(
MatrixPtr
m
,
memory
::
dims
dims
,
memory
::
format
fmt
,
engine
&
eg
,
mkldnn
::
memory
::
data_type
dtype
)
{
return
create
(
m
,
memory
::
primitive_desc
(
memory
::
desc
(
dims
,
dtype
,
fmt
),
eg
));
}
void
MKLDNNMatrix
::
reorderDataFrom
(
const
MKLDNNMatrixPtr
&
m
,
memory
::
format
srcFmt
,
memory
::
dims
targetDim
)
{
memory
::
format
dstFmt
=
getFormat
();
if
(
srcFmt
==
dstFmt
)
{
return
;
}
CHECK_EQ
(
getElementCnt
(),
m
->
getElementCnt
())
<<
"size should equal"
;
reorderOnce
(
getData
(),
m
->
getData
(),
srcFmt
,
dstFmt
,
targetDim
);
}
void
MKLDNNMatrix
::
reorderDataTo
(
const
MKLDNNMatrixPtr
&
m
,
memory
::
format
dstFmt
,
memory
::
dims
targetDim
)
{
memory
::
format
srcFmt
=
getFormat
();
if
(
srcFmt
==
dstFmt
)
{
return
;
}
CHECK_EQ
(
getElementCnt
(),
m
->
getElementCnt
())
<<
"size should equal"
;
reorderOnce
(
getData
(),
m
->
getData
(),
srcFmt
,
dstFmt
,
targetDim
);
}
void
MKLDNNMatrix
::
reorderOnce
(
void
*
srcData
,
void
*
dstData
,
memory
::
format
srcFmt
,
memory
::
format
dstFmt
,
memory
::
dims
dm
)
{
CHECK
(
srcData
);
CHECK
(
dstData
);
MatrixPtr
tmpSrc
;
if
(
dstData
==
srcData
)
{
// inplace data
size_t
sz
=
1
;
for
(
size_t
i
=
0
;
i
<
dm
.
size
();
++
i
)
{
sz
*=
dm
[
i
];
}
tmpSrc
=
Matrix
::
create
(
sz
,
1
,
false
,
false
);
tmpSrc
->
copyFrom
((
real
*
)
srcData
,
sz
);
srcData
=
tmpSrc
->
getData
();
}
auto
dtype
=
this
->
getDtype
();
auto
srcMD
=
memory
::
desc
(
dm
,
dtype
,
srcFmt
);
auto
dstMD
=
memory
::
desc
(
dm
,
dtype
,
dstFmt
);
auto
eg
=
this
->
getEngine
();
auto
src
=
memory
(
memory
::
primitive_desc
(
srcMD
,
eg
),
srcData
);
auto
dst
=
memory
(
memory
::
primitive_desc
(
dstMD
,
eg
),
dstData
);
auto
r
=
reorder
(
src
,
dst
);
stream
(
stream
::
kind
::
eager
).
submit
({
r
}).
wait
();
}
void
MKLDNNMatrix
::
downSpatial
()
{
int
fmt
=
getFormat
();
if
(
!
(
fmt
==
memory
::
format
::
nchw
||
fmt
==
memory
::
format
::
oihw
))
{
// only support nchw and oihw yet, later can support more like nhwc, ihwo
return
;
}
// TODO(TJ): change H(height) and W(width) if support nhwc or more
const
int
H
=
2
,
W
=
3
;
memory
::
dims
srcDims
=
getDims
();
if
(
srcDims
[
H
]
!=
1
||
srcDims
[
W
]
!=
1
)
{
// can not down spatial
return
;
}
memory
::
dims
dstDims
=
memory
::
dims
{
srcDims
[
0
],
srcDims
[
1
]};
memory
::
format
dstFmt
;
switch
(
fmt
)
{
case
memory
::
format
::
nchw
:
dstFmt
=
memory
::
format
::
nc
;
break
;
case
memory
::
format
::
oihw
:
dstFmt
=
memory
::
format
::
oi
;
break
;
default:
LOG
(
FATAL
)
<<
"unsupported format"
;
}
memory
::
desc
md
=
memory
::
desc
(
dstDims
,
getDtype
(),
dstFmt
);
memory
::
primitive_desc
pd
=
memory
::
primitive_desc
(
md
,
getEngine
());
mkldnn_primitive_t
result
;
mkldnn
::
error
::
wrap_c_api
(
mkldnn_primitive_create
(
&
result
,
pd
.
get
(),
nullptr
,
nullptr
),
"could not create a memory primitive"
);
reset
(
result
);
set_data_handle
(
getData
());
}
}
// namespace paddle
paddle/math/MKLDNNMatrix.h
0 → 100644
浏览文件 @
322d9ad8
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "Matrix.h"
#include "mkldnn.hpp"
#include "paddle/parameter/Parameter.h"
namespace
paddle
{
class
MKLDNNMatrix
;
typedef
std
::
shared_ptr
<
MKLDNNMatrix
>
MKLDNNMatrixPtr
;
/**
* @brief MKLDNN Matrix.
*
*/
class
MKLDNNMatrix
:
public
CpuMatrix
,
public
mkldnn
::
memory
{
public:
MKLDNNMatrix
(
real
*
data
,
size_t
height
,
size_t
width
,
mkldnn
::
memory
::
primitive_desc
pd
)
:
CpuMatrix
(
data
,
height
,
width
,
false
),
mkldnn
::
memory
(
pd
,
data
)
{}
~
MKLDNNMatrix
()
{}
/**
* Create MKLDNNMatrix from a MatrixPtr and memory primitive_desc
*/
static
MKLDNNMatrixPtr
create
(
MatrixPtr
m
,
mkldnn
::
memory
::
primitive_desc
pd
);
/**
* Create MKLDNNMatrix from a MatrixPtr and memory details info
*/
static
MKLDNNMatrixPtr
create
(
MatrixPtr
m
,
mkldnn
::
memory
::
dims
dims
,
mkldnn
::
memory
::
format
fmt
,
mkldnn
::
engine
&
eg
,
mkldnn
::
memory
::
data_type
dtype
=
mkldnn
::
memory
::
data_type
::
f32
);
public:
/**
* Reorder this MKLDNNMatrix from other format.
* Support inplace reorder.
* @note: this function would only reorder the data layout.
* will NOT change this original dim or format info
*/
void
reorderDataFrom
(
const
MKLDNNMatrixPtr
&
m
,
memory
::
format
srcFmt
,
memory
::
dims
targetDim
);
/**
* Reorder this MKLDNNMatrix to other format.
* Support inplace reorder.
* @note: this function would only reorder the data layout.
* will NOT change the dst dim or format info
*/
void
reorderDataTo
(
const
MKLDNNMatrixPtr
&
m
,
memory
::
format
dstFmt
,
memory
::
dims
targetDim
);
/**
* Dimensionality reduction.
* Change format "nchw --> nc" or "oihw --> oi" if the h and w are both 1
*/
void
downSpatial
();
/**
* Update the memory data handle.
* Caution: This will not check the buffer size of the data,
* it should be coverd by user.
*/
void
updateData
(
void
*
data
)
{
set_data_handle
(
data
);
}
/**
* Get primitive descriptor.
*/
mkldnn
::
memory
::
primitive_desc
getPrimitiveDesc
()
{
return
this
->
get_primitive_desc
();
}
/**
* Get memory descriptor.
*/
mkldnn
::
memory
::
desc
getMemoryDesc
()
{
return
getPrimitiveDesc
().
desc
();
}
/**
* Get dimensions.
*/
mkldnn
::
memory
::
dims
getDims
()
{
mkldnn
::
memory
::
desc
md
=
getMemoryDesc
();
const
int
*
src
=
md
.
data
.
dims
;
int
ndims
=
md
.
data
.
ndims
;
mkldnn
::
memory
::
dims
dst
;
dst
.
resize
(
ndims
);
for
(
int
i
=
0
;
i
<
ndims
;
++
i
)
{
dst
[
i
]
=
src
[
i
];
}
return
dst
;
}
/**
* Get format.
*/
mkldnn
::
memory
::
format
getFormat
()
{
return
(
mkldnn
::
memory
::
format
)(
getMemoryDesc
().
data
.
format
);
}
/**
* Get memory data type.
*/
mkldnn
::
memory
::
data_type
getDtype
()
{
return
(
mkldnn
::
memory
::
data_type
)(
getMemoryDesc
().
data
.
data_type
);
}
/**
* Get engine.
*/
mkldnn
::
engine
getEngine
()
{
return
getPrimitiveDesc
().
get_engine
();
}
protected:
/**
* Do reorder once.
* Can support inplace.
*/
void
reorderOnce
(
void
*
srcData
,
void
*
dstData
,
memory
::
format
srcFmt
,
memory
::
format
dstFmt
,
memory
::
dims
dm
);
};
}
// namespace paddle
paddle/parameter/Parameter.h
浏览文件 @
322d9ad8
...
...
@@ -281,7 +281,11 @@ public:
/**
* @brief Set the format in header.
*/
void
setHeaderFormat
(
int32_t
fmt
)
{
headerFormat_
=
fmt
;
}
void
setHeaderFormat
(
int32_t
fmt
)
{
CHECK
(
isHeaderFormatSupported
(
fmt
))
<<
"Unsupported format version: "
<<
fmt
;
headerFormat_
=
fmt
;
}
/**
* @brief Parameter Update Hook.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录