Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
322d9ad8
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
322d9ad8
编写于
8月 30, 2017
作者:
T
Tao Luo
提交者:
GitHub
8月 30, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3712 from tensor-tang/merge
add MKLDNN_DEVICE
上级
b45d020f
c5183caa
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
609 addition
and
134 deletion
+609
-134
cmake/external/mkldnn.cmake
cmake/external/mkldnn.cmake
+1
-1
cmake/external/mklml.cmake
cmake/external/mklml.cmake
+1
-1
paddle/gserver/layers/Layer.cpp
paddle/gserver/layers/Layer.cpp
+1
-1
paddle/gserver/layers/Layer.h
paddle/gserver/layers/Layer.h
+28
-1
paddle/gserver/layers/MKLDNNFcLayer.cpp
paddle/gserver/layers/MKLDNNFcLayer.cpp
+118
-102
paddle/gserver/layers/MKLDNNFcLayer.h
paddle/gserver/layers/MKLDNNFcLayer.h
+3
-4
paddle/gserver/layers/MKLDNNLayer.h
paddle/gserver/layers/MKLDNNLayer.h
+143
-23
paddle/math/Allocator.h
paddle/math/Allocator.h
+6
-0
paddle/math/CMakeLists.txt
paddle/math/CMakeLists.txt
+11
-0
paddle/math/MKLDNNMatrix.cpp
paddle/math/MKLDNNMatrix.cpp
+144
-0
paddle/math/MKLDNNMatrix.h
paddle/math/MKLDNNMatrix.h
+148
-0
paddle/parameter/Parameter.h
paddle/parameter/Parameter.h
+5
-1
未找到文件。
cmake/external/mkldnn.cmake
浏览文件 @
322d9ad8
...
@@ -51,7 +51,7 @@ ExternalProject_Add(
...
@@ -51,7 +51,7 @@ ExternalProject_Add(
${
EXTERNAL_PROJECT_LOG_ARGS
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
DEPENDS
${
MKLDNN_DEPENDS
}
DEPENDS
${
MKLDNN_DEPENDS
}
GIT_REPOSITORY
"https://github.com/01org/mkl-dnn.git"
GIT_REPOSITORY
"https://github.com/01org/mkl-dnn.git"
GIT_TAG
"v0.
9
"
GIT_TAG
"v0.
10
"
PREFIX
${
MKLDNN_SOURCES_DIR
}
PREFIX
${
MKLDNN_SOURCES_DIR
}
UPDATE_COMMAND
""
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
MKLDNN_INSTALL_DIR
}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
MKLDNN_INSTALL_DIR
}
...
...
cmake/external/mklml.cmake
浏览文件 @
322d9ad8
...
@@ -28,7 +28,7 @@ INCLUDE(ExternalProject)
...
@@ -28,7 +28,7 @@ INCLUDE(ExternalProject)
SET
(
MKLML_PROJECT
"extern_mklml"
)
SET
(
MKLML_PROJECT
"extern_mklml"
)
SET
(
MKLML_VER
"mklml_lnx_2018.0.20170720"
)
SET
(
MKLML_VER
"mklml_lnx_2018.0.20170720"
)
SET
(
MKLML_URL
"https://github.com/01org/mkl-dnn/releases/download/v0.
9
/
${
MKLML_VER
}
.tgz"
)
SET
(
MKLML_URL
"https://github.com/01org/mkl-dnn/releases/download/v0.
10
/
${
MKLML_VER
}
.tgz"
)
SET
(
MKLML_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/mklml"
)
SET
(
MKLML_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/mklml"
)
SET
(
MKLML_DOWNLOAD_DIR
"
${
MKLML_SOURCE_DIR
}
/src/
${
MKLML_PROJECT
}
"
)
SET
(
MKLML_DOWNLOAD_DIR
"
${
MKLML_SOURCE_DIR
}
/src/
${
MKLML_PROJECT
}
"
)
SET
(
MKLML_DST_DIR
"mklml"
)
SET
(
MKLML_DST_DIR
"mklml"
)
...
...
paddle/gserver/layers/Layer.cpp
浏览文件 @
322d9ad8
...
@@ -41,7 +41,7 @@ namespace paddle {
...
@@ -41,7 +41,7 @@ namespace paddle {
Layer
::
Layer
(
const
LayerConfig
&
config
,
bool
useGpu
)
Layer
::
Layer
(
const
LayerConfig
&
config
,
bool
useGpu
)
:
config_
(
config
),
:
config_
(
config
),
useGpu_
(
useGpu
),
useGpu_
(
useGpu
),
deviceId_
(
-
1
),
deviceId_
(
CPU_DEVICE
),
needSequenceInfo_
(
true
)
{}
needSequenceInfo_
(
true
)
{}
bool
Layer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
bool
Layer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
...
...
paddle/gserver/layers/Layer.h
浏览文件 @
322d9ad8
...
@@ -59,7 +59,12 @@ protected:
...
@@ -59,7 +59,12 @@ protected:
LayerConfig
config_
;
LayerConfig
config_
;
/// whether to use GPU
/// whether to use GPU
bool
useGpu_
;
bool
useGpu_
;
/// Device Id. CPU is -1, and GPU is 0, 1, 2 ...
/// Paddle device ID, MKLDNN is -2, CPU is -1
enum
PADDLE_DEVICE_ID
{
MKLDNN_DEVICE
=
-
2
,
CPU_DEVICE
=
-
1
,
};
/// Device Id. MKLDNN is -2, CPU is -1, and GPU is 0, 1, 2 ...
int
deviceId_
;
int
deviceId_
;
/// Input layers
/// Input layers
std
::
vector
<
LayerPtr
>
inputLayers_
;
std
::
vector
<
LayerPtr
>
inputLayers_
;
...
@@ -77,6 +82,7 @@ protected:
...
@@ -77,6 +82,7 @@ protected:
Argument
output_
;
Argument
output_
;
/// Several outputs stored on different devices, used in 'parallel_nn' case,
/// Several outputs stored on different devices, used in 'parallel_nn' case,
/// and record them by deviceId_.
/// and record them by deviceId_.
/// Also used in 'use_mkldnn' case.
std
::
vector
<
Argument
>
outputOtherDevice_
;
std
::
vector
<
Argument
>
outputOtherDevice_
;
/// If there are several outputs, map them by each name.
/// If there are several outputs, map them by each name.
std
::
map
<
std
::
string
,
Argument
*>
outputMap_
;
std
::
map
<
std
::
string
,
Argument
*>
outputMap_
;
...
@@ -172,6 +178,13 @@ protected:
...
@@ -172,6 +178,13 @@ protected:
return
inputLayer
.
getOutput
(
deviceId_
);
return
inputLayer
.
getOutput
(
deviceId_
);
}
}
/**
* Get the argument of input layer with deviceId.
*/
const
Argument
&
getInput
(
size_t
inputIndex
,
int
deviceId
)
const
{
return
inputLayers_
[
inputIndex
]
->
getOutput
(
deviceId
);
}
/**
/**
* Get the forward-input value.
* Get the forward-input value.
*/
*/
...
@@ -186,6 +199,13 @@ protected:
...
@@ -186,6 +199,13 @@ protected:
return
inputLayer
.
getOutput
(
deviceId_
).
value
;
return
inputLayer
.
getOutput
(
deviceId_
).
value
;
}
}
/**
* Get the forward-input value with deviceId.
*/
const
MatrixPtr
&
getInputValue
(
int
inputIndex
,
int
deviceId
)
{
return
inputLayers_
[
inputIndex
]
->
getOutput
(
deviceId
).
value
;
}
/**
/**
* Get the forward-input grad.
* Get the forward-input grad.
*/
*/
...
@@ -200,6 +220,13 @@ protected:
...
@@ -200,6 +220,13 @@ protected:
return
inputLayer
.
getOutput
(
deviceId_
).
grad
;
return
inputLayer
.
getOutput
(
deviceId_
).
grad
;
}
}
/**
* Get the forward-input grad.
*/
const
MatrixPtr
&
getInputGrad
(
int
inputIndex
,
int
deviceId
)
{
return
inputLayers_
[
inputIndex
]
->
getOutput
(
deviceId
).
grad
;
}
/**
/**
* Get the forward-input label.
* Get the forward-input label.
*/
*/
...
...
paddle/gserver/layers/MKLDNNFcLayer.cpp
浏览文件 @
322d9ad8
...
@@ -61,43 +61,42 @@ void MKLDNNFcLayer::convertWeightsFromPaddle() {
...
@@ -61,43 +61,42 @@ void MKLDNNFcLayer::convertWeightsFromPaddle() {
return
;
return
;
}
}
// TODO(TJ): dst format should get from wgtVal_
CHECK
(
wgtVal_
)
<<
"should have been initialized"
;
int
dstFmt
=
PARAM_FORMAT_MKLDNN_OI
;
bool
hasNoSpatial_
=
ih_
==
1
&&
iw_
==
1
;
int
srcFmt
=
weight_
->
getParameterPtr
()
->
getHeaderFormat
();
auto
targetDim
=
wgtVal_
->
getDims
();
if
(
srcFmt
==
dstFmt
)
{
auto
srcFmt
=
hasNoSpatial_
?
memory
::
format
::
io
:
memory
::
format
::
ihwo
;
return
;
wgtVal_
->
reorderDataFrom
(
wgtVal_
,
srcFmt
,
targetDim
);
}
// The weight_ is transposed from initial paddle weight
MatrixPtr
paddleWgt
=
Matrix
::
create
(
weight_
->
getW
()
->
getData
(),
iLayerSize_
,
oc_
,
false
,
false
);
// TODO(TJ): remove this print when do not need differ weights
std
::
ostringstream
ostr
;
paddleWgt
->
print
(
ostr
);
VLOG
(
MKLDNN_ALL
)
<<
"Initial Weight from paddle: "
<<
std
::
endl
<<
ostr
.
str
();
// The mkldnn weight is transposed from initial paddle matrix
MatrixPtr
paddleWgtT
;
paddleWgt
->
transpose
(
paddleWgtT
,
true
);
weight_
->
getW
()
->
copyFrom
(
*
paddleWgtT
);
weight_
->
getParameterPtr
()
->
setHeaderFormat
(
dstFmt
);
hasInitedWgt_
=
true
;
hasInitedWgt_
=
true
;
}
}
void
MKLDNNFcLayer
::
convertWeightsToPaddle
()
{
void
MKLDNNFcLayer
::
convertWeightsToPaddle
()
{
MatrixPtr
dnnWgt
=
weight_
->
getW
();
CHECK
(
wgtVal_
)
<<
"should have been initialized"
;
MatrixPtr
paddleWgt
;
bool
hasNoSpatial_
=
ih_
==
1
&&
iw_
==
1
;
dnnWgt
->
transpose
(
paddleWgt
,
true
);
auto
targetDim
=
wgtVal_
->
getDims
();
auto
dstFmt
=
hasNoSpatial_
?
memory
::
format
::
io
:
memory
::
format
::
ihwo
;
// copy paddle weight and override on weight_
wgtVal_
->
reorderDataTo
(
wgtVal_
,
dstFmt
,
targetDim
);
MatrixPtr
dnnWgtT
=
Matrix
::
create
(
}
dnnWgt
->
getData
(),
dnnWgt
->
getWidth
(),
dnnWgt
->
getHeight
(),
false
,
false
);
dnnWgtT
->
copyFrom
(
*
paddleWgt
);
void
MKLDNNFcLayer
::
convertOutputToOtherDevice
()
{
copyOutputInfoToOtherDevice
();
// find other cpu device and reorder output to cpu device
int
cnt
=
0
;
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
if
(
outputOtherDevice_
[
i
].
deviceId
==
CPU_DEVICE
)
{
// fc cpu output value do not need convert
// just share point
outputOtherDevice_
[
i
].
value
=
output_
.
value
;
++
cnt
;
}
}
if
(
cnt
>
1
)
{
LOG
(
WARNING
)
<<
"should not have more than one CPU devie"
;
}
}
}
void
MKLDNNFcLayer
::
reshape
()
{
void
MKLDNNFcLayer
::
reshape
()
{
const
Argument
&
input
=
getInput
(
0
);
const
Argument
&
input
=
getInput
(
0
,
getPrev
(
0
)
->
getDeviceId
()
);
int
batchSize
=
input
.
getBatchSize
();
int
batchSize
=
input
.
getBatchSize
();
if
(
bs_
==
batchSize
)
{
if
(
bs_
==
batchSize
)
{
return
;
return
;
...
@@ -111,10 +110,6 @@ void MKLDNNFcLayer::reshape() {
...
@@ -111,10 +110,6 @@ void MKLDNNFcLayer::reshape() {
if
(
iw_
==
0
)
{
if
(
iw_
==
0
)
{
iw_
=
1
;
iw_
=
1
;
}
}
hasSpatial_
=
true
;
if
(
ih_
==
1
&&
iw_
==
1
)
{
hasSpatial_
=
false
;
}
CHECK_EQ
(
iLayerSize_
,
inputLayers_
[
0
]
->
getSize
());
CHECK_EQ
(
iLayerSize_
,
inputLayers_
[
0
]
->
getSize
());
ic_
=
iLayerSize_
/
(
ih_
*
iw_
);
ic_
=
iLayerSize_
/
(
ih_
*
iw_
);
CHECK_EQ
(
size_t
(
ic_
*
ih_
*
iw_
),
iLayerSize_
)
<<
"not divisible"
;
CHECK_EQ
(
size_t
(
ic_
*
ih_
*
iw_
),
iLayerSize_
)
<<
"not divisible"
;
...
@@ -135,37 +130,53 @@ void MKLDNNFcLayer::reshape() {
...
@@ -135,37 +130,53 @@ void MKLDNNFcLayer::reshape() {
void
MKLDNNFcLayer
::
resetFwd
()
{
void
MKLDNNFcLayer
::
resetFwd
()
{
bool
hasBias
=
biases_
&&
biases_
->
getW
();
bool
hasBias
=
biases_
&&
biases_
->
getW
();
real
*
iData
=
getInputValue
(
0
)
->
getData
();
const
MatrixPtr
&
wgt
=
weight_
->
getW
();
real
*
oData
=
getOutputValue
()
->
getData
();
const
MatrixPtr
&
bias
=
hasBias
?
biases_
->
getW
()
:
nullptr
;
real
*
wData
=
weight_
->
getW
()
->
getData
();
const
MatrixPtr
&
out
=
output_
.
value
;
real
*
bData
=
hasBias
?
biases_
->
getW
()
->
getData
()
:
NULL
;
if
(
inputIsOnlyMKLDNN
())
{
// TODO(TJ): below create should be covered in MkldnnMatrix
const
MatrixPtr
&
in
=
getInputValue
(
0
);
// create memory desc
inVal_
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
in
);
memory
::
desc
iMD
=
hasSpatial_
?
createMD
({
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
)
CHECK
(
inVal_
)
<<
"Input should be MKLDNNMatrix"
;
:
createMD
({
bs_
,
ic_
},
format
::
nc
);
}
else
{
memory
::
desc
wMD
=
hasSpatial_
?
createMD
({
oc_
,
ic_
,
ih_
,
iw_
},
format
::
oihw
)
CHECK_EQ
(
getPrev
(
0
)
->
getDeviceId
(),
CPU_DEVICE
)
<<
"Only support CPU yet"
;
:
createMD
({
oc_
,
ic_
},
format
::
oi
);
const
MatrixPtr
&
in
=
getInputValue
(
0
,
CPU_DEVICE
);
memory
::
desc
bMD
=
bData
!=
NULL
?
createMD
({
oc_
},
format
::
x
)
inVal_
=
MKLDNNMatrix
::
create
(
:
createMD
({},
format
::
format_undef
);
in
,
memory
::
dims
{
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
,
engine_
);
memory
::
desc
oMD
=
createMD
({
bs_
,
oc_
},
format
::
nc
);
}
inVal_
->
downSpatial
();
// create memory primitive desc and memory self
wgtVal_
=
MKLDNNMatrix
::
create
(
inVal_
.
reset
(
new
memory
(
memory
::
primitive_desc
(
iMD
,
engine_
),
iData
));
wgt
,
memory
::
dims
{
oc_
,
ic_
,
ih_
,
iw_
},
format
::
oihw
,
engine_
);
wgtVal_
.
reset
(
new
memory
(
memory
::
primitive_desc
(
wMD
,
engine_
),
wData
));
wgtVal_
->
downSpatial
();
outVal_
.
reset
(
new
memory
(
memory
::
primitive_desc
(
oMD
,
engine_
),
oData
));
biasVal_
=
hasBias
?
MKLDNNMatrix
::
create
(
bias
,
{
oc_
},
format
::
x
,
engine_
)
:
nullptr
;
outVal_
=
MKLDNNMatrix
::
create
(
out
,
{
bs_
,
oc_
},
format
::
nc
,
engine_
);
// change original output value to mkldnn output value
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
outVal_
);
if
(
!
outputIsOnlyMKLDNN
())
{
convertOutputToOtherDevice
();
}
// create forward handle
prop_kind
pk
=
prop_kind
::
forward
;
prop_kind
pk
=
prop_kind
::
forward
;
fc_fwd
::
desc
fwdDesc
=
bData
!=
NULL
?
fc_fwd
::
desc
(
pk
,
iMD
,
wMD
,
bMD
,
oMD
)
fc_fwd
::
desc
fwdDesc
=
hasBias
?
fc_fwd
::
desc
(
pk
,
:
fc_fwd
::
desc
(
pk
,
iMD
,
wMD
,
oMD
);
inVal_
->
getMemoryDesc
(),
wgtVal_
->
getMemoryDesc
(),
biasVal_
->
getMemoryDesc
(),
outVal_
->
getMemoryDesc
())
:
fc_fwd
::
desc
(
pk
,
inVal_
->
getMemoryDesc
(),
wgtVal_
->
getMemoryDesc
(),
outVal_
->
getMemoryDesc
());
fc_fwd
::
primitive_desc
fwdPD
=
fc_fwd
::
primitive_desc
(
fwdDesc
,
engine_
);
fc_fwd
::
primitive_desc
fwdPD
=
fc_fwd
::
primitive_desc
(
fwdDesc
,
engine_
);
if
(
hasBias
)
{
if
(
bData
!=
NULL
)
{
biasVal_
.
reset
(
new
memory
(
memory
::
primitive_desc
(
bMD
,
engine_
),
bData
));
fwd_
.
reset
(
new
fc_fwd
(
fwdPD
,
*
inVal_
,
*
wgtVal_
,
*
biasVal_
,
*
outVal_
));
fwd_
.
reset
(
new
fc_fwd
(
fwdPD
,
*
inVal_
,
*
wgtVal_
,
*
biasVal_
,
*
outVal_
));
}
else
{
}
else
{
fwd_
.
reset
(
new
fc_fwd
(
fwdPD
,
*
inVal_
,
*
wgtVal_
,
*
outVal_
));
fwd_
.
reset
(
new
fc_fwd
(
fwdPD
,
*
inVal_
,
*
wgtVal_
,
*
outVal_
));
}
}
printValueFormatFlow
();
pipelineFwd_
.
clear
();
pipelineFwd_
.
clear
();
pipelineFwd_
.
push_back
(
*
fwd_
);
pipelineFwd_
.
push_back
(
*
fwd_
);
}
}
...
@@ -175,45 +186,46 @@ void MKLDNNFcLayer::resetBwd() {
...
@@ -175,45 +186,46 @@ void MKLDNNFcLayer::resetBwd() {
return
;
return
;
}
}
needResetBwd_
=
false
;
needResetBwd_
=
false
;
bool
hasBias
=
biases_
&&
biases_
->
getWGrad
();
bool
hasBias
=
biases_
&&
biases_
->
getWGrad
();
real
*
iData
=
getInputValue
(
0
)
->
getData
();
real
*
iDiff
=
getInputGrad
(
0
)
!=
nullptr
?
getInputGrad
(
0
)
->
getData
()
:
NULL
;
real
*
oDiff
=
getOutputGrad
()
->
getData
();
real
*
wDiff
=
weight_
->
getWGrad
()
->
getData
();
real
*
bDiff
=
hasBias
?
biases_
->
getWGrad
()
->
getData
()
:
NULL
;
/// backward weight
/// backward weight
// create memory desc for backward memory
CHECK
(
inVal_
)
<<
"Should have input value"
;
memory
::
desc
iMD
=
hasSpatial_
?
createMD
({
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
)
const
MatrixPtr
&
wgt
=
weight_
->
getWGrad
();
:
createMD
({
bs_
,
ic_
},
format
::
nc
);
const
MatrixPtr
&
bias
=
hasBias
?
biases_
->
getWGrad
()
:
nullptr
;
memory
::
desc
wMD
=
hasSpatial_
?
createMD
({
oc_
,
ic_
,
ih_
,
iw_
},
format
::
oihw
)
:
createMD
({
oc_
,
ic_
},
format
::
oi
);
// TODO(TJ): merge outgrad
memory
::
desc
oMD
=
createMD
({
bs_
,
oc_
},
format
::
nc
);
int
device
=
outputIsOnlyMKLDNN
()
?
MKLDNN_DEVICE
:
CPU_DEVICE
;
memory
::
desc
bMD
=
bDiff
!=
NULL
?
createMD
({
oc_
},
format
::
x
)
// for MKLDNN device:
:
createMD
({},
format
::
format_undef
);
// can not directly cast outputgrad to mkldnnmatrix,
// since each layer can not write the inputgrad to mkldnn inputgrad.
if
(
inVal_
)
{
// So just create from matrix with outputvalue format.
// update data
// for CPU device:
inVal_
->
set_data_handle
(
iData
);
// fc do not need to convert from cpu device since output is always nc format
}
else
{
// only need create from cpu device
inVal_
.
reset
(
new
memory
(
memory
::
primitive_desc
(
iMD
,
engine_
),
iData
));
const
MatrixPtr
&
out
=
getOutput
(
device
).
grad
;
}
outGrad_
=
MKLDNNMatrix
::
create
(
out
,
outVal_
->
getPrimitiveDesc
());
wgtGrad_
=
MKLDNNMatrix
::
create
(
wgt
,
wgtVal_
->
getPrimitiveDesc
());
// create memory primitive desc and memory self
biasGrad_
=
hasBias
?
MKLDNNMatrix
::
create
(
bias
,
biasVal_
->
getPrimitiveDesc
())
wgtGrad_
.
reset
(
new
memory
(
memory
::
primitive_desc
(
wMD
,
engine_
),
wDiff
));
:
nullptr
;
outGrad_
.
reset
(
new
memory
(
memory
::
primitive_desc
(
oMD
,
engine_
),
oDiff
));
// create memory primitive desc
fc_fwd
::
desc
fwdDesc
=
fc_fwd
::
desc
(
prop_kind
::
forward
,
iMD
,
wMD
,
oMD
);
fc_fwd
::
desc
fwdDesc
=
fc_fwd
::
desc
(
prop_kind
::
forward
,
inVal_
->
getMemoryDesc
(),
wgtGrad_
->
getMemoryDesc
(),
outGrad_
->
getMemoryDesc
());
fc_fwd
::
primitive_desc
fwdPD
=
fc_fwd
::
primitive_desc
(
fwdDesc
,
engine_
);
fc_fwd
::
primitive_desc
fwdPD
=
fc_fwd
::
primitive_desc
(
fwdDesc
,
engine_
);
fc_bwdWgt
::
desc
bwdWgtDesc
=
bDiff
!=
NULL
fc_bwdWgt
::
desc
bwdWgtDesc
=
hasBias
?
fc_bwdWgt
::
desc
(
iMD
,
wMD
,
bMD
,
oMD
)
?
fc_bwdWgt
::
desc
(
inVal_
->
getMemoryDesc
(),
:
fc_bwdWgt
::
desc
(
iMD
,
wMD
,
oMD
);
wgtGrad_
->
getMemoryDesc
(),
biasGrad_
->
getMemoryDesc
(),
outGrad_
->
getMemoryDesc
())
:
fc_bwdWgt
::
desc
(
inVal_
->
getMemoryDesc
(),
wgtGrad_
->
getMemoryDesc
(),
outGrad_
->
getMemoryDesc
());
fc_bwdWgt
::
primitive_desc
bwdWgtPD
=
fc_bwdWgt
::
primitive_desc
bwdWgtPD
=
fc_bwdWgt
::
primitive_desc
(
bwdWgtDesc
,
engine_
,
fwdPD
);
fc_bwdWgt
::
primitive_desc
(
bwdWgtDesc
,
engine_
,
fwdPD
);
if
(
bDiff
!=
NULL
)
{
if
(
hasBias
)
{
biasGrad_
.
reset
(
new
memory
(
memory
::
primitive_desc
(
bMD
,
engine_
),
bDiff
));
bwdWgt_
.
reset
(
bwdWgt_
.
reset
(
new
fc_bwdWgt
(
bwdWgtPD
,
*
inVal_
,
*
outGrad_
,
*
wgtGrad_
,
*
biasGrad_
));
new
fc_bwdWgt
(
bwdWgtPD
,
*
inVal_
,
*
outGrad_
,
*
wgtGrad_
,
*
biasGrad_
));
}
else
{
}
else
{
...
@@ -223,15 +235,26 @@ void MKLDNNFcLayer::resetBwd() {
...
@@ -223,15 +235,26 @@ void MKLDNNFcLayer::resetBwd() {
pipelineBwd_
.
push_back
(
*
bwdWgt_
);
pipelineBwd_
.
push_back
(
*
bwdWgt_
);
/// backward data
/// backward data
if
(
iDiff
==
NULL
)
{
device
=
inputIsOnlyMKLDNN
()
?
MKLDNN_DEVICE
:
CPU_DEVICE
;
const
MatrixPtr
&
in
=
getInputGrad
(
0
,
device
);
if
(
in
==
nullptr
)
{
return
;
return
;
}
}
fc_bwdData
::
desc
bwdDataDesc
=
fc_bwdData
::
desc
(
iMD
,
wMD
,
oMD
);
if
(
getInput
(
0
,
device
).
getAllCount
()
>
1
)
{
// TODO(TJ): use outputMaps_ ways when merge outgrad done
}
else
{
inGrad_
=
MKLDNNMatrix
::
create
(
in
,
inVal_
->
getPrimitiveDesc
());
}
fc_bwdData
::
desc
bwdDataDesc
=
fc_bwdData
::
desc
(
inVal_
->
getMemoryDesc
(),
wgtGrad_
->
getMemoryDesc
(),
outGrad_
->
getMemoryDesc
());
fc_bwdData
::
primitive_desc
bwdDataPD
=
fc_bwdData
::
primitive_desc
bwdDataPD
=
fc_bwdData
::
primitive_desc
(
bwdDataDesc
,
engine_
,
fwdPD
);
fc_bwdData
::
primitive_desc
(
bwdDataDesc
,
engine_
,
fwdPD
);
inGrad_
.
reset
(
new
memory
(
memory
::
primitive_desc
(
iMD
,
engine_
),
iDiff
));
CHECK
(
wgtVal_
)
<<
"Should have weight memory"
;
CHECK
(
wgtVal_
)
<<
"Should have weight memory"
;
bwdData_
.
reset
(
new
fc_bwdData
(
bwdDataPD
,
*
outGrad_
,
*
wgtVal_
,
*
inGrad_
));
bwdData_
.
reset
(
new
fc_bwdData
(
bwdDataPD
,
*
outGrad_
,
*
wgtVal_
,
*
inGrad_
));
printGradFormatFlow
();
pipelineBwd_
.
push_back
(
*
bwdData_
);
pipelineBwd_
.
push_back
(
*
bwdData_
);
}
}
...
@@ -241,11 +264,7 @@ void MKLDNNFcLayer::forward(PassType passType) {
...
@@ -241,11 +264,7 @@ void MKLDNNFcLayer::forward(PassType passType) {
{
{
REGISTER_TIMER_INFO
(
"mkldnn_FwdTimer"
,
getName
().
c_str
());
REGISTER_TIMER_INFO
(
"mkldnn_FwdTimer"
,
getName
().
c_str
());
syncInputValue
();
// update input data
// since it might be changed if this is after data layer
real
*
iData
=
getInputValue
(
0
)
->
getData
();
inVal_
->
set_data_handle
(
iData
);
// just submit forward pipeline
// just submit forward pipeline
stream_
->
submit
(
pipelineFwd_
);
stream_
->
submit
(
pipelineFwd_
);
...
@@ -267,10 +286,7 @@ void MKLDNNFcLayer::backward(const UpdateCallback& callback) {
...
@@ -267,10 +286,7 @@ void MKLDNNFcLayer::backward(const UpdateCallback& callback) {
REGISTER_TIMER_INFO
(
"mkldnn_bwdTimer"
,
getName
().
c_str
());
REGISTER_TIMER_INFO
(
"mkldnn_bwdTimer"
,
getName
().
c_str
());
resetBwd
();
resetBwd
();
// update diff
syncOutputGrad
();
real
*
oDiff
=
getOutputGrad
()
->
getData
();
outGrad_
->
set_data_handle
(
oDiff
);
// just sumbmit backward pipeline
// just sumbmit backward pipeline
stream_
->
submit
(
pipelineBwd_
);
stream_
->
submit
(
pipelineBwd_
);
}
}
...
...
paddle/gserver/layers/MKLDNNFcLayer.h
浏览文件 @
322d9ad8
...
@@ -32,16 +32,13 @@ protected:
...
@@ -32,16 +32,13 @@ protected:
// if has already init the weight
// if has already init the weight
bool
hasInitedWgt_
;
bool
hasInitedWgt_
;
// if input layer has image size info (ih>1 && iw>1)
bool
hasSpatial_
;
// fc weight and bias
// fc weight and bias
std
::
unique_ptr
<
Weight
>
weight_
;
std
::
unique_ptr
<
Weight
>
weight_
;
std
::
unique_ptr
<
Weight
>
biases_
;
std
::
unique_ptr
<
Weight
>
biases_
;
public:
public:
explicit
MKLDNNFcLayer
(
const
LayerConfig
&
config
)
explicit
MKLDNNFcLayer
(
const
LayerConfig
&
config
)
:
MKLDNNLayer
(
config
),
hasInitedWgt_
(
false
)
,
hasSpatial_
(
true
)
{}
:
MKLDNNLayer
(
config
),
hasInitedWgt_
(
false
)
{}
~
MKLDNNFcLayer
()
{}
~
MKLDNNFcLayer
()
{}
...
@@ -75,6 +72,8 @@ protected:
...
@@ -75,6 +72,8 @@ protected:
* only would be called when needed
* only would be called when needed
*/
*/
void
resetBwd
();
void
resetBwd
();
void
convertOutputToOtherDevice
()
override
;
};
};
}
// namespace paddle
}
// namespace paddle
paddle/gserver/layers/MKLDNNLayer.h
浏览文件 @
322d9ad8
...
@@ -18,9 +18,9 @@ limitations under the License. */
...
@@ -18,9 +18,9 @@ limitations under the License. */
#include "Layer.h"
#include "Layer.h"
#include "MKLDNNBase.h"
#include "MKLDNNBase.h"
#include "mkldnn.hpp"
#include "mkldnn.hpp"
#include "paddle/math/MKLDNNMatrix.h"
DECLARE_bool
(
use_mkldnn
);
DECLARE_bool
(
use_mkldnn
);
DECLARE_bool
(
use_mkldnn_wgt
);
namespace
paddle
{
namespace
paddle
{
...
@@ -52,15 +52,15 @@ protected:
...
@@ -52,15 +52,15 @@ protected:
std
::
vector
<
mkldnn
::
primitive
>
pipelineFwd_
;
std
::
vector
<
mkldnn
::
primitive
>
pipelineFwd_
;
std
::
vector
<
mkldnn
::
primitive
>
pipelineBwd_
;
std
::
vector
<
mkldnn
::
primitive
>
pipelineBwd_
;
//
TODO(TJ): change below memory as MKLDNNMatrixPtr type
//
MKLDNNMatrixPtr
std
::
shared_ptr
<
mkldnn
::
memory
>
inVal_
;
MKLDNNMatrixPtr
inVal_
;
std
::
shared_ptr
<
mkldnn
::
memory
>
inGrad_
;
MKLDNNMatrixPtr
inGrad_
;
std
::
shared_ptr
<
mkldnn
::
memory
>
outVal_
;
MKLDNNMatrixPtr
outVal_
;
std
::
shared_ptr
<
mkldnn
::
memory
>
outGrad_
;
MKLDNNMatrixPtr
outGrad_
;
std
::
shared_ptr
<
mkldnn
::
memory
>
wgtVal_
;
MKLDNNMatrixPtr
wgtVal_
;
std
::
shared_ptr
<
mkldnn
::
memory
>
wgtGrad_
;
MKLDNNMatrixPtr
wgtGrad_
;
std
::
shared_ptr
<
mkldnn
::
memory
>
biasVal_
;
MKLDNNMatrixPtr
biasVal_
;
std
::
shared_ptr
<
mkldnn
::
memory
>
biasGrad_
;
MKLDNNMatrixPtr
biasGrad_
;
public:
public:
explicit
MKLDNNLayer
(
const
LayerConfig
&
config
)
explicit
MKLDNNLayer
(
const
LayerConfig
&
config
)
...
@@ -83,17 +83,21 @@ public:
...
@@ -83,17 +83,21 @@ public:
virtual
bool
init
(
const
LayerMap
&
layerMap
,
virtual
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
const
ParameterMap
&
parameterMap
)
{
CHECK
(
FLAGS_use_mkldnn
)
<<
"MkldnnLayers only support use_mkldnn."
<<
"Please set WITH_MKLDNN=ON "
<<
"and set use_mkldnn=True"
;
CHECK
(
!
useGpu_
)
<<
"Do not support GPU yet"
;
// set device id before Layer::init
setDevice
(
MKLDNN_DEVICE
);
// change param device to MKLDNN device
setParamsDevice
(
MKLDNN_DEVICE
,
parameterMap
);
if
(
!
Layer
::
init
(
layerMap
,
parameterMap
))
{
if
(
!
Layer
::
init
(
layerMap
,
parameterMap
))
{
return
false
;
return
false
;
}
}
CHECK
(
FLAGS_use_mkldnn
)
<<
"MkldnnLayers only support use_mkldnn."
<<
"Please set WITH_MKLDNN=ON "
<<
"and set use_mkldnn=True"
;
stream_
.
reset
(
new
MKLDNNStream
());
stream_
.
reset
(
new
MKLDNNStream
());
engine_
=
CPUEngine
::
Instance
().
getEngine
();
engine_
=
CPUEngine
::
Instance
().
getEngine
();
// TODO(TJ): deivecId
return
true
;
return
true
;
}
}
...
@@ -109,6 +113,12 @@ public:
...
@@ -109,6 +113,12 @@ public:
*/
*/
virtual
void
convertWeightsToPaddle
()
{}
virtual
void
convertWeightsToPaddle
()
{}
/**
* convert MKLDNN output to other device.
* only support CPU device yet
*/
virtual
void
convertOutputToOtherDevice
()
{}
/**
/**
* print info about sizes
* print info about sizes
*/
*/
...
@@ -118,14 +128,124 @@ public:
...
@@ -118,14 +128,124 @@ public:
<<
", oh: "
<<
oh_
<<
", ow: "
<<
ow_
;
<<
", oh: "
<<
oh_
<<
", ow: "
<<
ow_
;
}
}
// TODO(TJ): move to MkldnnMatrix
/**
// create memory desc
* Print the mkldnn memory format flow of value
inline
mkldnn
::
memory
::
desc
createMD
(
*/
mkldnn
::
memory
::
dims
dims
,
virtual
void
printValueFormatFlow
()
{
mkldnn
::
memory
::
format
fmt
,
if
(
inVal_
&&
outVal_
)
{
mkldnn
::
memory
::
data_type
type
=
mkldnn
::
memory
::
data_type
::
f32
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"value format flow --- "
<<
inVal_
->
getFormat
()
// TODO(TJ): isFmtSuppoted(fmt)
<<
" >>> "
<<
outVal_
->
getFormat
();
return
mkldnn
::
memory
::
desc
(
dims
,
type
,
fmt
);
}
}
/**
* Print the mkldnn memory format flow of grad
*/
virtual
void
printGradFormatFlow
()
{
if
(
inGrad_
&&
outGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"grad format flow --- "
<<
inGrad_
->
getFormat
()
<<
" <<< "
<<
outGrad_
->
getFormat
();
}
}
protected:
/**
* copy image size and sequence info to other device
* @note: can not directly use Layer::copyOutputToOtherDevice since here only
* copy base info and do not copy data value
*/
void
copyOutputInfoToOtherDevice
()
{
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
outputOtherDevice_
[
i
].
setFrameHeight
(
output_
.
getFrameHeight
());
outputOtherDevice_
[
i
].
setFrameWidth
(
output_
.
getFrameWidth
());
outputOtherDevice_
[
i
].
sequenceStartPositions
=
output_
.
sequenceStartPositions
;
outputOtherDevice_
[
i
].
subSequenceStartPositions
=
output_
.
subSequenceStartPositions
;
outputOtherDevice_
[
i
].
cpuSequenceDims
=
output_
.
cpuSequenceDims
;
}
}
/**
* If input only has MKLDNN device.
* Otherwise, only support the previous layer using CPU device.
*/
bool
inputIsOnlyMKLDNN
(
int
index
=
0
)
{
int
prevDevice
=
getPrev
(
index
)
->
getDeviceId
();
if
(
prevDevice
==
MKLDNN_DEVICE
)
{
return
true
;
}
else
{
// do not support GPU yet
CHECK_EQ
(
prevDevice
,
CPU_DEVICE
)
<<
"Only support CPU yet"
;
return
false
;
}
}
/**
* If output only has MKLDNN device.
* Otherwise, other devices should only using CPU device.
*/
bool
outputIsOnlyMKLDNN
()
{
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
CHECK_EQ
(
outputOtherDevice_
[
i
].
deviceId
,
CPU_DEVICE
)
<<
"Only support other device is CPU yet"
;
}
return
outputOtherDevice_
.
size
()
==
0
;
}
/**
* Sync input value data
*/
void
syncInputValue
()
{
if
(
inputIsOnlyMKLDNN
())
{
return
;
}
real
*
iData
=
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
();
// update input data
// since it might be changed if this is after data layer
inVal_
->
updateData
(
iData
);
}
/**
* Sync output grad data
*/
void
syncOutputGrad
()
{
if
(
outputIsOnlyMKLDNN
())
{
return
;
}
// update diff
real
*
oDiff
=
getOutput
(
CPU_DEVICE
).
grad
->
getData
();
outGrad_
->
updateData
(
oDiff
);
}
/**
* Set deviceId of this layer.
*/
void
setDevice
(
int
id
)
{
deviceId_
=
id
;
}
/**
* Set deviceId of the params used in this layer.
*/
void
setParamsDevice
(
int
id
,
const
ParameterMap
&
parameterMap
)
{
for
(
auto
&
inputConfig
:
config_
.
inputs
())
{
if
(
inputConfig
.
has_input_parameter_name
())
{
ParameterPtr
parameter
;
std
::
string
name
=
inputConfig
.
input_parameter_name
();
CHECK
(
mapGet
(
name
,
parameterMap
,
&
parameter
))
<<
"Cannot find input parameter "
<<
name
<<
" for layer "
<<
getName
();
parameter
->
setDevice
(
id
);
}
}
if
(
config_
.
has_bias_parameter_name
())
{
ParameterPtr
parameter
;
std
::
string
name
=
config_
.
bias_parameter_name
();
CHECK
(
mapGet
(
name
,
parameterMap
,
&
parameter
))
<<
"Cannot find bias parameter "
<<
name
<<
" for layer "
<<
getName
();
parameter
->
setDevice
(
id
);
}
}
}
};
};
...
...
paddle/math/Allocator.h
浏览文件 @
322d9ad8
...
@@ -48,7 +48,13 @@ public:
...
@@ -48,7 +48,13 @@ public:
*/
*/
virtual
void
*
alloc
(
size_t
size
)
{
virtual
void
*
alloc
(
size_t
size
)
{
void
*
ptr
;
void
*
ptr
;
#ifdef PADDLE_USE_MKLDNN
// refer to https://github.com/01org/mkl-dnn/blob/master/include/mkldnn.hpp
// memory alignment
CHECK_EQ
(
posix_memalign
(
&
ptr
,
4096ul
,
size
),
0
);
#else
CHECK_EQ
(
posix_memalign
(
&
ptr
,
32ul
,
size
),
0
);
CHECK_EQ
(
posix_memalign
(
&
ptr
,
32ul
,
size
),
0
);
#endif
CHECK
(
ptr
)
<<
"Fail to allocate CPU memory: size="
<<
size
;
CHECK
(
ptr
)
<<
"Fail to allocate CPU memory: size="
<<
size
;
return
ptr
;
return
ptr
;
}
}
...
...
paddle/math/CMakeLists.txt
浏览文件 @
322d9ad8
...
@@ -14,6 +14,17 @@
...
@@ -14,6 +14,17 @@
#
#
file
(
GLOB MATH_HEADERS . *.h
)
file
(
GLOB MATH_HEADERS . *.h
)
file
(
GLOB MATH_SOURCES . *.cpp
)
file
(
GLOB MATH_SOURCES . *.cpp
)
if
(
NOT WITH_MKLDNN
)
set
(
DNN_HEADER
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/MKLDNNMatrix.h"
)
set
(
DNN_SOURCE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/MKLDNNMatrix.cpp"
)
list
(
REMOVE_ITEM MATH_HEADERS
"
${
DNN_HEADER
}
"
)
list
(
REMOVE_ITEM MATH_SOURCES
"
${
DNN_SOURCE
}
"
)
message
(
STATUS
"Skip compiling with MKLDNNMatrix"
)
else
()
message
(
STATUS
"Compile with MKLDNNMatrix"
)
endif
()
set
(
MATH_SOURCES
set
(
MATH_SOURCES
"
${
PADDLE_SOURCE_DIR
}
/paddle/math/BaseMatrix.cu"
"
${
PADDLE_SOURCE_DIR
}
/paddle/math/BaseMatrix.cu"
"
${
PADDLE_SOURCE_DIR
}
/paddle/math/TrainingAlgorithmOp.cu"
"
${
PADDLE_SOURCE_DIR
}
/paddle/math/TrainingAlgorithmOp.cu"
...
...
paddle/math/MKLDNNMatrix.cpp
0 → 100644
浏览文件 @
322d9ad8
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MKLDNNMatrix.h"
using
namespace
mkldnn
;
// NOLINT
namespace
paddle
{
MKLDNNMatrixPtr
MKLDNNMatrix
::
create
(
MatrixPtr
m
,
memory
::
primitive_desc
pd
)
{
memory
::
desc
md
=
pd
.
desc
();
size_t
ndims
=
md
.
data
.
ndims
;
int
*
dims
=
md
.
data
.
dims
;
CHECK
(
ndims
>
0
)
<<
"Input dims should not be empty"
;
size_t
cnts
=
1
;
for
(
size_t
i
=
0
;
i
<
ndims
;
++
i
)
{
cnts
*=
dims
[
i
];
}
if
(
m
==
nullptr
)
{
size_t
height
=
dims
[
0
];
size_t
width
=
cnts
/
dims
[
0
];
m
=
Matrix
::
create
(
height
,
width
,
false
,
false
);
}
CHECK
(
m
)
<<
" Matrix should not be empty"
;
CpuMatrixPtr
cpuMatrix
=
std
::
dynamic_pointer_cast
<
CpuMatrix
>
(
m
);
CHECK
(
cpuMatrix
)
<<
"Only support create from CPU matrix yet"
;
CHECK_EQ
(
cnts
,
m
->
getElementCnt
())
<<
"Count size does not match"
;
return
std
::
make_shared
<
MKLDNNMatrix
>
(
m
->
getData
(),
m
->
getHeight
(),
m
->
getWidth
(),
pd
);
}
MKLDNNMatrixPtr
MKLDNNMatrix
::
create
(
MatrixPtr
m
,
memory
::
dims
dims
,
memory
::
format
fmt
,
engine
&
eg
,
mkldnn
::
memory
::
data_type
dtype
)
{
return
create
(
m
,
memory
::
primitive_desc
(
memory
::
desc
(
dims
,
dtype
,
fmt
),
eg
));
}
void
MKLDNNMatrix
::
reorderDataFrom
(
const
MKLDNNMatrixPtr
&
m
,
memory
::
format
srcFmt
,
memory
::
dims
targetDim
)
{
memory
::
format
dstFmt
=
getFormat
();
if
(
srcFmt
==
dstFmt
)
{
return
;
}
CHECK_EQ
(
getElementCnt
(),
m
->
getElementCnt
())
<<
"size should equal"
;
reorderOnce
(
getData
(),
m
->
getData
(),
srcFmt
,
dstFmt
,
targetDim
);
}
void
MKLDNNMatrix
::
reorderDataTo
(
const
MKLDNNMatrixPtr
&
m
,
memory
::
format
dstFmt
,
memory
::
dims
targetDim
)
{
memory
::
format
srcFmt
=
getFormat
();
if
(
srcFmt
==
dstFmt
)
{
return
;
}
CHECK_EQ
(
getElementCnt
(),
m
->
getElementCnt
())
<<
"size should equal"
;
reorderOnce
(
getData
(),
m
->
getData
(),
srcFmt
,
dstFmt
,
targetDim
);
}
void
MKLDNNMatrix
::
reorderOnce
(
void
*
srcData
,
void
*
dstData
,
memory
::
format
srcFmt
,
memory
::
format
dstFmt
,
memory
::
dims
dm
)
{
CHECK
(
srcData
);
CHECK
(
dstData
);
MatrixPtr
tmpSrc
;
if
(
dstData
==
srcData
)
{
// inplace data
size_t
sz
=
1
;
for
(
size_t
i
=
0
;
i
<
dm
.
size
();
++
i
)
{
sz
*=
dm
[
i
];
}
tmpSrc
=
Matrix
::
create
(
sz
,
1
,
false
,
false
);
tmpSrc
->
copyFrom
((
real
*
)
srcData
,
sz
);
srcData
=
tmpSrc
->
getData
();
}
auto
dtype
=
this
->
getDtype
();
auto
srcMD
=
memory
::
desc
(
dm
,
dtype
,
srcFmt
);
auto
dstMD
=
memory
::
desc
(
dm
,
dtype
,
dstFmt
);
auto
eg
=
this
->
getEngine
();
auto
src
=
memory
(
memory
::
primitive_desc
(
srcMD
,
eg
),
srcData
);
auto
dst
=
memory
(
memory
::
primitive_desc
(
dstMD
,
eg
),
dstData
);
auto
r
=
reorder
(
src
,
dst
);
stream
(
stream
::
kind
::
eager
).
submit
({
r
}).
wait
();
}
void
MKLDNNMatrix
::
downSpatial
()
{
int
fmt
=
getFormat
();
if
(
!
(
fmt
==
memory
::
format
::
nchw
||
fmt
==
memory
::
format
::
oihw
))
{
// only support nchw and oihw yet, later can support more like nhwc, ihwo
return
;
}
// TODO(TJ): change H(height) and W(width) if support nhwc or more
const
int
H
=
2
,
W
=
3
;
memory
::
dims
srcDims
=
getDims
();
if
(
srcDims
[
H
]
!=
1
||
srcDims
[
W
]
!=
1
)
{
// can not down spatial
return
;
}
memory
::
dims
dstDims
=
memory
::
dims
{
srcDims
[
0
],
srcDims
[
1
]};
memory
::
format
dstFmt
;
switch
(
fmt
)
{
case
memory
::
format
::
nchw
:
dstFmt
=
memory
::
format
::
nc
;
break
;
case
memory
::
format
::
oihw
:
dstFmt
=
memory
::
format
::
oi
;
break
;
default:
LOG
(
FATAL
)
<<
"unsupported format"
;
}
memory
::
desc
md
=
memory
::
desc
(
dstDims
,
getDtype
(),
dstFmt
);
memory
::
primitive_desc
pd
=
memory
::
primitive_desc
(
md
,
getEngine
());
mkldnn_primitive_t
result
;
mkldnn
::
error
::
wrap_c_api
(
mkldnn_primitive_create
(
&
result
,
pd
.
get
(),
nullptr
,
nullptr
),
"could not create a memory primitive"
);
reset
(
result
);
set_data_handle
(
getData
());
}
}
// namespace paddle
paddle/math/MKLDNNMatrix.h
0 → 100644
浏览文件 @
322d9ad8
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "Matrix.h"
#include "mkldnn.hpp"
#include "paddle/parameter/Parameter.h"
namespace
paddle
{
class
MKLDNNMatrix
;
typedef
std
::
shared_ptr
<
MKLDNNMatrix
>
MKLDNNMatrixPtr
;
/**
* @brief MKLDNN Matrix.
*
*/
class
MKLDNNMatrix
:
public
CpuMatrix
,
public
mkldnn
::
memory
{
public:
MKLDNNMatrix
(
real
*
data
,
size_t
height
,
size_t
width
,
mkldnn
::
memory
::
primitive_desc
pd
)
:
CpuMatrix
(
data
,
height
,
width
,
false
),
mkldnn
::
memory
(
pd
,
data
)
{}
~
MKLDNNMatrix
()
{}
/**
* Create MKLDNNMatrix from a MatrixPtr and memory primitive_desc
*/
static
MKLDNNMatrixPtr
create
(
MatrixPtr
m
,
mkldnn
::
memory
::
primitive_desc
pd
);
/**
* Create MKLDNNMatrix from a MatrixPtr and memory details info
*/
static
MKLDNNMatrixPtr
create
(
MatrixPtr
m
,
mkldnn
::
memory
::
dims
dims
,
mkldnn
::
memory
::
format
fmt
,
mkldnn
::
engine
&
eg
,
mkldnn
::
memory
::
data_type
dtype
=
mkldnn
::
memory
::
data_type
::
f32
);
public:
/**
* Reorder this MKLDNNMatrix from other format.
* Support inplace reorder.
* @note: this function would only reorder the data layout.
* will NOT change this original dim or format info
*/
void
reorderDataFrom
(
const
MKLDNNMatrixPtr
&
m
,
memory
::
format
srcFmt
,
memory
::
dims
targetDim
);
/**
* Reorder this MKLDNNMatrix to other format.
* Support inplace reorder.
* @note: this function would only reorder the data layout.
* will NOT change the dst dim or format info
*/
void
reorderDataTo
(
const
MKLDNNMatrixPtr
&
m
,
memory
::
format
dstFmt
,
memory
::
dims
targetDim
);
/**
* Dimensionality reduction.
* Change format "nchw --> nc" or "oihw --> oi" if the h and w are both 1
*/
void
downSpatial
();
/**
* Update the memory data handle.
* Caution: This will not check the buffer size of the data,
* it should be coverd by user.
*/
void
updateData
(
void
*
data
)
{
set_data_handle
(
data
);
}
/**
* Get primitive descriptor.
*/
mkldnn
::
memory
::
primitive_desc
getPrimitiveDesc
()
{
return
this
->
get_primitive_desc
();
}
/**
* Get memory descriptor.
*/
mkldnn
::
memory
::
desc
getMemoryDesc
()
{
return
getPrimitiveDesc
().
desc
();
}
/**
* Get dimensions.
*/
mkldnn
::
memory
::
dims
getDims
()
{
mkldnn
::
memory
::
desc
md
=
getMemoryDesc
();
const
int
*
src
=
md
.
data
.
dims
;
int
ndims
=
md
.
data
.
ndims
;
mkldnn
::
memory
::
dims
dst
;
dst
.
resize
(
ndims
);
for
(
int
i
=
0
;
i
<
ndims
;
++
i
)
{
dst
[
i
]
=
src
[
i
];
}
return
dst
;
}
/**
* Get format.
*/
mkldnn
::
memory
::
format
getFormat
()
{
return
(
mkldnn
::
memory
::
format
)(
getMemoryDesc
().
data
.
format
);
}
/**
* Get memory data type.
*/
mkldnn
::
memory
::
data_type
getDtype
()
{
return
(
mkldnn
::
memory
::
data_type
)(
getMemoryDesc
().
data
.
data_type
);
}
/**
* Get engine.
*/
mkldnn
::
engine
getEngine
()
{
return
getPrimitiveDesc
().
get_engine
();
}
protected:
/**
* Do reorder once.
* Can support inplace.
*/
void
reorderOnce
(
void
*
srcData
,
void
*
dstData
,
memory
::
format
srcFmt
,
memory
::
format
dstFmt
,
memory
::
dims
dm
);
};
}
// namespace paddle
paddle/parameter/Parameter.h
浏览文件 @
322d9ad8
...
@@ -281,7 +281,11 @@ public:
...
@@ -281,7 +281,11 @@ public:
/**
/**
* @brief Set the format in header.
* @brief Set the format in header.
*/
*/
void
setHeaderFormat
(
int32_t
fmt
)
{
headerFormat_
=
fmt
;
}
void
setHeaderFormat
(
int32_t
fmt
)
{
CHECK
(
isHeaderFormatSupported
(
fmt
))
<<
"Unsupported format version: "
<<
fmt
;
headerFormat_
=
fmt
;
}
/**
/**
* @brief Parameter Update Hook.
* @brief Parameter Update Hook.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录