Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
c1914543
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c1914543
编写于
10月 19, 2017
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine mkldnn logic, move reset buffers into MKLDNNLayer
上级
60b84856
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
358 addition
and
506 deletion
+358
-506
paddle/gserver/layers/MKLDNNConvLayer.cpp
paddle/gserver/layers/MKLDNNConvLayer.cpp
+33
-200
paddle/gserver/layers/MKLDNNConvLayer.h
paddle/gserver/layers/MKLDNNConvLayer.h
+0
-66
paddle/gserver/layers/MKLDNNFcLayer.cpp
paddle/gserver/layers/MKLDNNFcLayer.cpp
+24
-77
paddle/gserver/layers/MKLDNNFcLayer.h
paddle/gserver/layers/MKLDNNFcLayer.h
+0
-8
paddle/gserver/layers/MKLDNNLayer.h
paddle/gserver/layers/MKLDNNLayer.h
+274
-50
paddle/gserver/layers/MKLDNNPoolLayer.cpp
paddle/gserver/layers/MKLDNNPoolLayer.cpp
+13
-90
paddle/gserver/layers/MKLDNNPoolLayer.h
paddle/gserver/layers/MKLDNNPoolLayer.h
+0
-13
paddle/math/MKLDNNMatrix.cpp
paddle/math/MKLDNNMatrix.cpp
+1
-1
paddle/math/MKLDNNMatrix.h
paddle/math/MKLDNNMatrix.h
+13
-1
未找到文件。
paddle/gserver/layers/MKLDNNConvLayer.cpp
浏览文件 @
c1914543
...
...
@@ -116,8 +116,6 @@ void MKLDNNConvLayer::resetFwd(std::vector<primitive>& pipeline,
resetFwdBuffers
(
fwdPD_
,
in
,
wgt
,
bias
,
out
);
resetFwdPipeline
(
pipeline
,
fwdPD_
,
in
,
wgt
,
bias
,
out
);
printValueFormatFlow
();
}
void
MKLDNNConvLayer
::
resetBwd
(
std
::
vector
<
primitive
>&
pipeline
,
...
...
@@ -135,12 +133,6 @@ void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline,
resetBwdBuffers
(
bwdWgtPD
,
bwdDataPD
,
in
,
wgt
,
bias
,
out
);
resetBwdPipeline
(
pipeline
,
bwdWgtPD
,
bwdDataPD
,
in
,
wgt
,
bias
,
out
);
printGradFormatFlow
();
}
void
MKLDNNConvLayer
::
updateInputData
()
{
cpuInVal_
->
setData
(
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
());
}
void
MKLDNNConvLayer
::
updateWeights
(
const
UpdateCallback
&
callback
)
{
...
...
@@ -211,11 +203,18 @@ void MKLDNNConvLayer::resetFwdBuffers(
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
pd
);
resetInValue
(
pd
,
in
);
resetInValue
(
in
,
std
::
make_shared
<
memory
::
primitive_desc
>
(
pd
->
src_primitive_desc
()));
resetOutValue
(
out
,
pd
->
dst_primitive_desc
());
resetW
gtBiasValue
(
pd
,
wgt
,
bias
);
resetW
ithMatrix
(
wgt
,
weight_
->
getW
(),
pd
->
weights_primitive_desc
()
);
resetOutValue
(
pd
,
out
);
bias
=
nullptr
;
if
(
biases_
==
nullptr
||
biases_
->
getW
()
==
nullptr
)
{
return
;
}
resetWithMatrix
(
bias
,
biases_
->
getW
(),
pd
->
bias_primitive_desc
());
}
void
MKLDNNConvLayer
::
resetFwdPipeline
(
...
...
@@ -225,104 +224,12 @@ void MKLDNNConvLayer::resetFwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
if
(
cvtInVal_
)
{
pipeline
.
push_back
(
*
cvtInVal_
);
}
if
(
bias
)
{
fwd_
.
reset
(
new
conv_fwd
(
*
pd
,
*
in
,
*
wgt
,
*
bias
,
*
out
));
}
else
{
fwd_
.
reset
(
new
conv_fwd
(
*
pd
,
*
in
,
*
wgt
,
*
out
));
}
pipeline
.
push_back
(
*
fwd_
);
if
(
cvtOutVal_
)
{
pipeline
.
push_back
(
*
cvtOutVal_
);
}
}
void
MKLDNNConvLayer
::
resetInValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
)
{
const
MatrixPtr
&
inMat
=
inputLayers_
[
0
]
->
getOutputValue
();
in
=
MKLDNNMatrix
::
create
(
inMat
,
pd
->
src_primitive_desc
());
// create buffer and reorder if input value do not match
cpuInVal_
=
nullptr
;
cvtInVal_
=
nullptr
;
MKLDNNMatrixPtr
dnnIn
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
inMat
);
CHECK_EQ
(
inputIsOnlyMKLDNN
(),
dnnIn
!=
nullptr
);
if
(
dnnIn
!=
nullptr
&&
dnnIn
->
getPrimitiveDesc
()
==
in
->
getPrimitiveDesc
())
{
in
=
dnnIn
;
return
;
}
if
(
dnnIn
)
{
if
(
dnnIn
->
getFormat
()
==
format
::
nc
)
{
CHECK
(
ih_
==
1
&&
iw_
==
1
)
<<
"when input is nc format"
;
// create a new one with nchw format and same data
memory
::
dims
inDims
=
memory
::
dims
{
bs_
,
ic_
,
1
,
1
};
dnnIn
=
MKLDNNMatrix
::
create
(
inMat
,
inDims
,
format
::
nchw
,
engine_
);
}
if
(
dnnIn
->
getPrimitiveDesc
()
==
in
->
getPrimitiveDesc
())
{
in
=
dnnIn
;
return
;
}
cpuInVal_
=
dnnIn
;
in
=
MKLDNNMatrix
::
create
(
nullptr
,
pd
->
src_primitive_desc
());
cvtInVal_
=
MKLDNNMatrix
::
createReorder
(
cpuInVal_
,
in
);
CHECK
(
cvtInVal_
)
<<
"should not be emptry"
;
}
else
{
memory
::
dims
inDims
=
memory
::
dims
{
bs_
,
ic_
,
ih_
,
iw_
};
cpuInVal_
=
MKLDNNMatrix
::
create
(
inMat
,
inDims
,
format
::
nchw
,
engine_
);
if
(
cpuInVal_
->
getPrimitiveDesc
()
!=
in
->
getPrimitiveDesc
())
{
// create new mkldnn matrix
in
=
MKLDNNMatrix
::
create
(
nullptr
,
pd
->
src_primitive_desc
());
cvtInVal_
=
MKLDNNMatrix
::
createReorder
(
cpuInVal_
,
in
);
CHECK
(
cvtInVal_
)
<<
"should not be emptry"
;
}
else
{
in
=
cpuInVal_
;
}
}
}
void
MKLDNNConvLayer
::
resetWgtBiasValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
)
{
wgt
=
MKLDNNMatrix
::
create
(
weight_
->
getW
(),
pd
->
weights_primitive_desc
());
VLOG
(
MKLDNN_FMTS
)
<<
"Weight value format: "
<<
wgt
->
getFormat
();
bias
=
(
biases_
&&
biases_
->
getW
())
?
MKLDNNMatrix
::
create
(
biases_
->
getW
(),
pd
->
bias_primitive_desc
())
:
nullptr
;
}
void
MKLDNNConvLayer
::
resetOutValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
out
)
{
out
=
MKLDNNMatrix
::
create
(
output_
.
value
,
pd
->
dst_primitive_desc
());
// create reorder if output value has cpu device and pd do not match
cpuOutVal_
=
nullptr
;
cvtOutVal_
=
nullptr
;
if
(
!
outputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
value
;
memory
::
dims
outDims
=
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
};
cpuOutVal_
=
MKLDNNMatrix
::
create
(
cpuOut
,
outDims
,
format
::
nchw
,
engine_
);
if
(
cpuOutVal_
->
getPrimitiveDesc
()
!=
pd
->
dst_primitive_desc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
pd
->
dst_primitive_desc
());
cvtOutVal_
=
MKLDNNMatrix
::
createReorder
(
out
,
cpuOutVal_
);
CHECK
(
cvtOutVal_
)
<<
"should not be empty"
;
}
else
{
cpuOut
->
setData
(
output_
.
value
->
getData
());
cpuOutVal_
=
out
;
}
// when output is cpu device, change the mkldnn output value and make them
// share the same data. Then if next layer use inputlayer->getOuputValue()
// to achieve the input value, it will get the right data.
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
cpuOutVal_
);
return
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
out
);
}
void
MKLDNNConvLayer
::
resetBwdWgtPD
(
...
...
@@ -331,8 +238,8 @@ void MKLDNNConvLayer::resetBwdWgtPD(
loadConvSettings
(
wgtDims
,
biasDims
,
strides
,
dilations
,
padL
,
padR
);
// create backward weight using input, output and weight value memory desc
CHECK
(
inVal_
)
<<
"Should have input value"
;
CHECK
(
outVal_
)
<<
"Should have output value"
;
CHECK
(
inVal_
)
<<
"Should have in
ternal in
put value"
;
CHECK
(
outVal_
)
<<
"Should have
internal
output value"
;
CHECK
(
wgtVal_
)
<<
"Should have weight value"
;
algorithm
algo
=
algorithm
::
convolution_direct
;
padding_kind
padKind
=
padding_kind
::
zero
;
...
...
@@ -372,8 +279,8 @@ void MKLDNNConvLayer::resetBwdDataPD(
memory
::
dims
wgtDims
,
biasDims
,
strides
,
dilations
,
padL
,
padR
;
loadConvSettings
(
wgtDims
,
biasDims
,
strides
,
dilations
,
padL
,
padR
);
CHECK
(
inVal_
)
<<
"Should have input value"
;
CHECK
(
outVal_
)
<<
"Should have output value"
;
CHECK
(
inVal_
)
<<
"Should have in
ternal in
put value"
;
CHECK
(
outVal_
)
<<
"Should have
internal
output value"
;
// create backward data using input and output value memory desc
// but using weight memory desc with any format
auto
bwdDataDesc
=
conv_bwdData
::
desc
(
algorithm
::
convolution_direct
,
...
...
@@ -399,12 +306,27 @@ void MKLDNNConvLayer::resetBwdBuffers(
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
wgtPD
);
resetOutGrad
(
wgtPD
,
out
);
resetOutGrad
(
out
,
wgtPD
->
diff_dst_primitive_desc
()
);
resetWgtBiasGrad
(
wgtPD
,
wgt
,
bias
);
resetWithMatrix
(
wgt
,
weight_
->
getWGrad
(),
wgtPD
->
diff_weights_primitive_desc
());
CHECK
(
wgtVal_
!=
nullptr
&&
wgt
->
getPrimitiveDesc
()
==
wgtVal_
->
getPrimitiveDesc
())
<<
"primitive desc of weight grad and value should be equal"
;
resetInGrad
(
dataPD
,
in
);
bias
=
nullptr
;
if
(
biases_
&&
biases_
->
getWGrad
())
{
resetWithMatrix
(
bias
,
biases_
->
getWGrad
(),
wgtPD
->
diff_bias_primitive_desc
());
CHECK
(
bias
&&
biasVal_
&&
bias
->
getPrimitiveDesc
()
==
biasVal_
->
getPrimitiveDesc
())
<<
"primitive desc of bias grad should equal the bias value"
;
}
if
(
dataPD
==
nullptr
)
{
return
;
}
resetInGrad
(
in
,
dataPD
->
diff_src_primitive_desc
());
resetWgtValBwdData
(
dataPD
,
wgtValBwdData_
);
}
...
...
@@ -416,10 +338,7 @@ void MKLDNNConvLayer::resetBwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
if
(
cvtOutGrad_
)
{
pipeline
.
push_back
(
*
cvtOutGrad_
);
}
CHECK
(
inVal_
);
// add bwdWgt handle
if
(
bias
)
{
bwdWgt_
.
reset
(
new
conv_bwdWgt
(
*
wgtPD
,
*
inVal_
,
*
out
,
*
wgt
,
*
bias
));
...
...
@@ -431,99 +350,13 @@ void MKLDNNConvLayer::resetBwdPipeline(
if
(
dataPD
==
nullptr
)
{
return
;
}
if
(
cvtWgtVal_
)
{
pipeline
.
push_back
(
*
cvtWgtVal_
);
}
// add bwdData handle
CHECK
(
wgtValBwdData_
)
<<
"Should have weight memory"
;
bwdData_
.
reset
(
new
conv_bwdData
(
*
dataPD
,
*
out
,
*
wgtValBwdData_
,
*
in
));
pipeline
.
push_back
(
*
bwdData_
);
if
(
cvtInGrad_
)
{
pipeline
.
push_back
(
*
cvtInGrad_
);
}
}
void
MKLDNNConvLayer
::
resetOutGrad
(
std
::
shared_ptr
<
conv_bwdWgt
::
primitive_desc
>&
wgtPD
,
MKLDNNMatrixPtr
&
out
)
{
cpuOutGrad_
=
nullptr
;
cvtOutGrad_
=
nullptr
;
CHECK
(
outVal_
!=
nullptr
&&
outVal_
->
getPrimitiveDesc
()
==
wgtPD
->
diff_dst_primitive_desc
())
<<
"primitive desc of out grad and value should be equal"
;
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
// always share the same grad data of CPU output
// then the activation can get the right grad from output_.grad
output_
.
grad
->
setData
(
cpuOut
->
getData
());
// same PrimitiveDesc with cpuInVal_
CHECK
(
cpuOutVal_
);
cpuOutGrad_
=
MKLDNNMatrix
::
create
(
cpuOut
,
cpuOutVal_
->
getPrimitiveDesc
());
// create reorder if primitive desc does not match
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
!=
outVal_
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
outVal_
->
getPrimitiveDesc
());
cvtOutGrad_
=
MKLDNNMatrix
::
createReorder
(
cpuOutGrad_
,
out
);
CHECK
(
cvtOutGrad_
);
}
else
{
out
=
cpuOutGrad_
;
}
}
}
void
MKLDNNConvLayer
::
resetWgtBiasGrad
(
std
::
shared_ptr
<
conv_bwdWgt
::
primitive_desc
>&
wgtPD
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
)
{
wgt
=
MKLDNNMatrix
::
create
(
weight_
->
getWGrad
(),
wgtPD
->
diff_weights_primitive_desc
());
CHECK
(
nullptr
!=
wgtVal_
&&
wgt
->
getPrimitiveDesc
()
==
wgtVal_
->
getPrimitiveDesc
())
<<
"primitive desc of weight grad and value should be equal"
;
VLOG
(
MKLDNN_FMTS
)
<<
"weight grad format: "
<<
wgt
->
getFormat
();
bias
=
nullptr
;
if
(
biasVal_
==
nullptr
)
{
return
;
}
bias
=
MKLDNNMatrix
::
create
(
biases_
->
getWGrad
(),
wgtPD
->
diff_bias_primitive_desc
());
CHECK
(
bias
->
getPrimitiveDesc
()
==
biasVal_
->
getPrimitiveDesc
())
<<
"primitive desc of bias grad should equal the bias value"
;
}
void
MKLDNNConvLayer
::
resetInGrad
(
std
::
shared_ptr
<
conv_bwdData
::
primitive_desc
>&
dataPD
,
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
cpuInGrad_
=
nullptr
;
cvtInGrad_
=
nullptr
;
if
(
dataPD
==
nullptr
)
{
return
;
}
if
(
inputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetInGrad
(
in
,
dataPD
->
diff_src_primitive_desc
());
CHECK
(
nullptr
!=
inVal_
&&
in
->
getPrimitiveDesc
()
==
inVal_
->
getPrimitiveDesc
())
<<
"primitive desc of input grad and value should be equal"
;
}
else
{
const
MatrixPtr
&
cpuIn
=
getInputGrad
(
0
,
CPU_DEVICE
);
// same PrimitiveDesc with cpuInVal_
CHECK
(
cpuInVal_
);
cpuInGrad_
=
MKLDNNMatrix
::
create
(
cpuIn
,
cpuInVal_
->
getPrimitiveDesc
());
in
=
cpuInGrad_
;
// create reorder if PrimitiveDesc does not match
if
(
cpuInGrad_
->
getPrimitiveDesc
()
!=
dataPD
->
diff_src_primitive_desc
())
{
in
=
MKLDNNMatrix
::
create
(
getInputGrad
(
0
,
MKLDNN_DEVICE
),
dataPD
->
diff_src_primitive_desc
());
cvtInGrad_
=
MKLDNNMatrix
::
createReorder
(
in
,
cpuInGrad_
);
CHECK
(
cvtInGrad_
);
}
}
}
void
MKLDNNConvLayer
::
resetWgtValBwdData
(
...
...
paddle/gserver/layers/MKLDNNConvLayer.h
浏览文件 @
c1914543
...
...
@@ -48,17 +48,6 @@ protected:
// save forward primitive_desc, which can be used backward
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>
fwdPD_
;
// MKLDNNMatrixPtr which should be created from CPU Device
MKLDNNMatrixPtr
cpuInVal_
;
MKLDNNMatrixPtr
cpuInGrad_
;
MKLDNNMatrixPtr
cpuOutVal_
;
MKLDNNMatrixPtr
cpuOutGrad_
;
// convert handle between CPU device and MKLDNN device
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtInVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtInGrad_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutGrad_
;
// whether the weight has been init
bool
hasInitedWgt_
;
...
...
@@ -94,8 +83,6 @@ public:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
override
;
void
updateInputData
()
override
;
void
updateWeights
(
const
UpdateCallback
&
callback
)
override
;
void
convertWeightsFromPaddle
()
override
;
...
...
@@ -109,26 +96,6 @@ public:
<<
", sw: "
<<
sw_
<<
", dh: "
<<
dh_
<<
", dw: "
<<
dw_
;
}
void
printValueFormatFlow
()
override
{
if
(
cpuInVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
cpuInVal_
->
getFormat
()
<<
" >>>"
;
}
MKLDNNLayer
::
printValueFormatFlow
();
if
(
cpuOutVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
" >>> "
<<
cpuOutVal_
->
getFormat
();
}
}
void
printGradFormatFlow
()
override
{
if
(
cpuInGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
cpuInGrad_
->
getFormat
()
<<
" <<<"
;
}
MKLDNNLayer
::
printGradFormatFlow
();
if
(
cpuOutGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
" <<< "
<<
cpuOutGrad_
->
getFormat
();
}
}
protected:
/**
* load the dims settings of this conv
...
...
@@ -162,23 +129,6 @@ protected:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
);
/**
* reset MKLDNNMatrix of input value
*/
void
resetInValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
);
/**
* reset MKLDNNMatrix of weight and bias value
*/
void
resetWgtBiasValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
);
/**
* reset MKLDNNMatrix of output value
*/
void
resetOutValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
out
);
/**
* reset the backward weight primitive descriptor.
*/
...
...
@@ -207,22 +157,6 @@ protected:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
);
/**
* reset MKLDNNMatrix of output grad
*/
void
resetOutGrad
(
std
::
shared_ptr
<
conv_bwdWgt
::
primitive_desc
>&
wgtPD
,
MKLDNNMatrixPtr
&
out
);
/**
* reset MKLDNNMatrix of weight and bias grad
*/
void
resetWgtBiasGrad
(
std
::
shared_ptr
<
conv_bwdWgt
::
primitive_desc
>&
wgtPD
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
);
/**
* reset MKLDNNMatrix of input grad
*/
void
resetInGrad
(
std
::
shared_ptr
<
conv_bwdData
::
primitive_desc
>&
dataPD
,
MKLDNNMatrixPtr
&
in
);
/**
* reset MKLDNNMatrix of weight value for backward data
* since the primitive_desc would be different with wgtVal_
...
...
paddle/gserver/layers/MKLDNNFcLayer.cpp
浏览文件 @
c1914543
...
...
@@ -62,7 +62,7 @@ void MKLDNNFcLayer::convertWeightsFromPaddle() {
CHECK
(
wgtVal_
)
<<
"should have been initialized"
;
bool
hasNoSpatial_
=
ih_
==
1
&&
iw_
==
1
;
auto
targetDim
=
wgtVal_
->
getDims
();
auto
srcFmt
=
hasNoSpatial_
?
memory
::
format
::
io
:
memory
::
format
::
ihwo
;
auto
srcFmt
=
hasNoSpatial_
?
format
::
io
:
format
::
ihwo
;
wgtVal_
->
reorderDataFrom
(
wgtVal_
,
srcFmt
,
targetDim
);
hasInitedWgt_
=
true
;
}
...
...
@@ -71,7 +71,7 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
CHECK
(
wgtVal_
)
<<
"should have been initialized"
;
bool
hasNoSpatial_
=
ih_
==
1
&&
iw_
==
1
;
auto
targetDim
=
wgtVal_
->
getDims
();
auto
dstFmt
=
hasNoSpatial_
?
memory
::
format
::
io
:
memory
::
format
::
ihwo
;
auto
dstFmt
=
hasNoSpatial_
?
format
::
io
:
format
::
ihwo
;
wgtVal_
->
reorderDataTo
(
wgtVal_
,
dstFmt
,
targetDim
);
}
...
...
@@ -100,8 +100,6 @@ void MKLDNNFcLayer::resetFwd(std::vector<primitive>& pipeline,
resetFwdPD
(
fwdPD_
,
in
,
wgt
,
bias
,
out
);
resetFwdPipeline
(
pipeline
,
fwdPD_
,
in
,
wgt
,
bias
,
out
);
printValueFormatFlow
();
}
void
MKLDNNFcLayer
::
resetBwd
(
std
::
vector
<
primitive
>&
pipeline
,
...
...
@@ -119,12 +117,6 @@ void MKLDNNFcLayer::resetBwd(std::vector<primitive>& pipeline,
resetBwdDataPD
(
bwdDataPD
,
in
,
out
);
resetBwdPipeline
(
pipeline
,
bwdWgtPD
,
bwdDataPD
,
in
,
wgt
,
bias
,
out
);
printGradFormatFlow
();
}
void
MKLDNNFcLayer
::
updateInputData
()
{
inVal_
->
setData
(
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
());
}
void
MKLDNNFcLayer
::
updateWeights
(
const
UpdateCallback
&
callback
)
{
...
...
@@ -139,51 +131,33 @@ void MKLDNNFcLayer::resetFwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
resetInValue
(
in
);
CHECK
(
in
);
in
->
downSpatial
();
resetWgtBiasValue
(
wgt
,
bias
);
resetOutValue
(
out
);
}
// if (extInVal_) {
// extInVal_->downSpatial();
// }
void
MKLDNNFcLayer
::
resetInValue
(
MKLDNNMatrixPtr
&
in
)
{
if
(
inputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
dnnIn
=
getInputValue
(
0
);
in
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
dnnIn
);
CHECK
(
in
)
<<
"Input should be MKLDNNMatrix"
;
}
else
{
CHECK_EQ
(
getPrev
(
0
)
->
getDeviceId
(),
CPU_DEVICE
)
<<
"Only support CPU yet"
;
const
MatrixPtr
&
cpuIn
=
getInputValue
(
0
,
CPU_DEVICE
);
in
=
MKLDNNMatrix
::
create
(
cpuIn
,
{
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
,
engine_
);
}
in
->
downSpatial
();
}
auto
outPD
=
MKLDNNMatrix
::
createPrimitiveDesc
({
bs_
,
oc_
},
format
::
nc
,
engine_
);
resetOutValue
(
out
,
outPD
);
void
MKLDNNFcLayer
::
resetWgtBiasValue
(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
)
{
format
wgtFmt
=
format
::
oihw
;
if
(
in
Val_
->
getFormat
()
==
format
::
nChw8c
)
{
if
(
in
->
getFormat
()
==
format
::
nChw8c
)
{
wgtFmt
=
format
::
oIhw8i
;
}
else
if
(
in
Val_
->
getFormat
()
==
format
::
nChw16c
)
{
}
else
if
(
in
->
getFormat
()
==
format
::
nChw16c
)
{
wgtFmt
=
format
::
oIhw16i
;
}
wgt
=
MKLDNNMatrix
::
create
(
weight_
->
getW
(),
{
oc_
,
ic_
,
ih_
,
iw_
},
wgtFmt
,
engine_
);
auto
wgtPD
=
MKLDNNMatrix
::
createPrimitiveDesc
({
oc_
,
ic_
,
ih_
,
iw_
},
wgtFmt
,
engine_
);
resetWithMatrix
(
wgt
,
weight_
->
getW
(),
wgtPD
);
wgt
->
downSpatial
();
VLOG
(
MKLDNN_FMTS
)
<<
"Weight value format: "
<<
wgt
->
getFormat
();
bias
=
(
biases_
&&
biases_
->
getW
())
?
MKLDNNMatrix
::
create
(
biases_
->
getW
(),
{
oc_
},
format
::
x
,
engine_
)
:
nullptr
;
}
void
MKLDNNFcLayer
::
resetOutValue
(
MKLDNNMatrixPtr
&
out
)
{
out
=
MKLDNNMatrix
::
create
(
output_
.
value
,
{
bs_
,
oc_
},
format
::
nc
,
engine_
);
if
(
!
outputIsOnlyMKLDNN
())
{
// fc cpu output value do not need create convert, just share data
getOutput
(
CPU_DEVICE
).
value
->
setData
(
out
->
getData
());
if
(
biases_
==
nullptr
||
biases_
->
getW
()
==
nullptr
)
{
return
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
out
);
auto
biasPD
=
MKLDNNMatrix
::
createPrimitiveDesc
({
oc_
},
format
::
x
,
engine_
);
resetWithMatrix
(
bias
,
biases_
->
getW
(),
biasPD
);
}
void
MKLDNNFcLayer
::
resetFwdPD
(
std
::
shared_ptr
<
fc_fwd
::
primitive_desc
>&
pd
,
...
...
@@ -219,7 +193,6 @@ void MKLDNNFcLayer::resetFwdPipeline(
}
else
{
fwd_
.
reset
(
new
fc_fwd
(
*
pd
,
*
in
,
*
wgt
,
*
out
));
}
pipeline
.
push_back
(
*
fwd_
);
}
...
...
@@ -227,44 +200,18 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
resetOutGrad
(
out
);
resetWgtBiasGrad
(
wgt
,
bias
);
resetInGrad
(
in
);
}
void
MKLDNNFcLayer
::
resetOutGrad
(
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
outVal_
);
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
output_
.
grad
->
setData
(
cpuOut
->
getData
());
out
=
MKLDNNMatrix
::
create
(
cpuOut
,
outVal_
->
getPrimitiveDesc
());
}
}
CHECK
(
inVal_
&&
outVal_
);
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
void
MKLDNNFcLayer
::
resetWgtBiasGrad
(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
)
{
CHECK
(
wgtVal_
);
wgt
=
MKLDNNMatrix
::
create
(
weight_
->
getWGrad
(),
wgtVal_
->
getPrimitiveDesc
());
resetWithMatrix
(
wgt
,
weight_
->
getWGrad
(),
wgtVal_
->
getPrimitiveDesc
());
bias
=
nullptr
;
if
(
biasVal_
==
nullptr
)
{
return
;
}
bias
=
MKLDNNMatrix
::
create
(
biases_
->
getWGrad
(),
biasVal_
->
getPrimitiveDesc
());
}
void
MKLDNNFcLayer
::
resetInGrad
(
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
if
(
inputLayers_
[
0
]
->
getOutput
().
grad
==
nullptr
)
{
return
;
}
CHECK
(
inVal_
);
MKLDNNLayer
::
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
resetWithMatrix
(
bias
,
biases_
->
getWGrad
(),
biasVal_
->
getPrimitiveDesc
());
}
void
MKLDNNFcLayer
::
resetBwdWgtPD
(
...
...
paddle/gserver/layers/MKLDNNFcLayer.h
浏览文件 @
c1914543
...
...
@@ -66,8 +66,6 @@ public:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
override
;
void
updateInputData
()
override
;
void
updateWeights
(
const
UpdateCallback
&
callback
)
override
;
void
convertWeightsFromPaddle
()
override
;
...
...
@@ -84,9 +82,6 @@ protected:
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
);
void
resetInValue
(
MKLDNNMatrixPtr
&
in
);
void
resetWgtBiasValue
(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
);
void
resetOutValue
(
MKLDNNMatrixPtr
&
out
);
void
resetFwdPD
(
std
::
shared_ptr
<
fc_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
in
,
MKLDNNMatrixPtr
wgt
,
...
...
@@ -109,9 +104,6 @@ protected:
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
);
void
resetOutGrad
(
MKLDNNMatrixPtr
&
out
);
void
resetWgtBiasGrad
(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
);
void
resetInGrad
(
MKLDNNMatrixPtr
&
in
);
void
resetBwdWgtPD
(
std
::
shared_ptr
<
fc_bwdWgt
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
...
...
paddle/gserver/layers/MKLDNNLayer.h
浏览文件 @
c1914543
...
...
@@ -58,11 +58,30 @@ protected:
std
::
vector
<
mkldnn
::
primitive
>
pipelineFwd_
;
std
::
vector
<
mkldnn
::
primitive
>
pipelineBwd_
;
// MKLDNNMatrixPtr with internal format
/// value and grad are seperate as internal and external buffers.
/// each MKLDNNLayer must init or reset internal buffer at least,
/// and the external buffer format is always nchw of nc(when h==w==1),
/// which is the same format as paddle.
/// When mixed with cpu device, the output_.value and output_.grad
/// always save the external data.
/// When all layers are all mkldnn layers, they could be internal data.
/// below MKLDNNMatrix buffers are all internal buffers
MKLDNNMatrixPtr
inVal_
;
MKLDNNMatrixPtr
inGrad_
;
MKLDNNMatrixPtr
outVal_
;
MKLDNNMatrixPtr
outGrad_
;
// below are external value and grad
MKLDNNMatrixPtr
extInVal_
;
MKLDNNMatrixPtr
extInGrad_
;
MKLDNNMatrixPtr
extOutVal_
;
MKLDNNMatrixPtr
extOutGrad_
;
// convert handle between external and internal buffers
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtInVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtInGrad_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutGrad_
;
// weight and bias are always internal buffers
MKLDNNMatrixPtr
wgtVal_
;
MKLDNNMatrixPtr
wgtGrad_
;
MKLDNNMatrixPtr
biasVal_
;
...
...
@@ -91,6 +110,7 @@ public:
oh_
(
0
),
ow_
(
0
),
needResetBwd_
(
true
),
outputOnlyMKLDNN_
(
false
),
engine_
(
mkldnn
::
engine
::
cpu
,
0
),
stream_
(
nullptr
),
fwd_
(
nullptr
),
...
...
@@ -128,20 +148,39 @@ public:
REGISTER_TIMER_INFO
(
"mkldnn_FwdTimer"
,
getName
().
c_str
());
CHECK
(
!
inputLayers_
.
empty
());
copySeqInfoToOutputs
();
size_t
elemenCnt
=
inputLayers_
[
0
]
->
getOutput
().
value
->
getElementCnt
();
size_t
elemenCnt
=
inputLayers_
[
0
]
->
getOutput
Value
()
->
getElementCnt
();
if
(
inputElemenCnt_
!=
elemenCnt
)
{
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" reset mkldnn forward"
;
// reset when input total sizes changed, not only the batchsize
inputElemenCnt_
=
elemenCnt
;
pipelineFwd_
.
clear
();
reshape
(
bs_
,
ic_
,
ih_
,
iw_
,
oc_
,
oh_
,
ow_
);
// all cpu device output grad or value share output's
shareCPUDevice
();
resetFwd
(
pipelineFwd_
,
inVal_
,
wgtVal_
,
biasVal_
,
outVal_
);
// MKLDNNLayer output value should be MKLDNNMatrix
// so external output value is necessary.
// then external input value is not necessary,
// since input may be mkldnn internal buffer.
CHECK
(
extOutVal_
)
<<
"external output value is necessary"
;
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
extOutVal_
);
CHECK
(
inVal_
&&
outVal_
)
<<
"internal memories are necessary"
;
if
(
cvtInVal_
)
{
pipelineFwd_
.
insert
(
pipelineFwd_
.
begin
(),
*
cvtInVal_
);
}
if
(
cvtOutVal_
)
{
pipelineFwd_
.
push_back
(
*
cvtOutVal_
);
}
convertWeightsFromPaddle
();
printValueFormat
();
needResetBwd_
=
true
;
}
if
(
inputLayers_
[
0
]
->
getType
()
==
"data"
)
{
updateInputData
();
// Update input value data when input layer is "data" type,
// since the input value data address might be changed.
CHECK
(
extInVal_
);
extInVal_
->
setData
(
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
());
}
if
(
!
outputOnlyMKLDNN_
)
{
...
...
@@ -149,8 +188,7 @@ public:
}
stream_
->
submit
(
pipelineFwd_
);
}
/* activation */
{
{
REGISTER_TIMER_INFO
(
"FwActTimer"
,
getName
().
c_str
());
forwardActivation
();
}
...
...
@@ -163,6 +201,16 @@ public:
pipelineMergeGrad_
.
clear
();
mergeGrad_
=
nullptr
;
resetBwd
(
pipelineBwd_
,
inGrad_
,
wgtGrad_
,
biasGrad_
,
outGrad_
);
// external output grad is not necessary
// since output may be mkldnn internal buffer or merge them directly.
CHECK
(
outGrad_
)
<<
"internal output grad is necessary"
;
if
(
cvtOutGrad_
)
{
pipelineBwd_
.
insert
(
pipelineBwd_
.
begin
(),
*
cvtOutGrad_
);
}
if
(
cvtInGrad_
)
{
pipelineBwd_
.
push_back
(
*
cvtInGrad_
);
}
printGradFormat
();
needResetBwd_
=
false
;
}
...
...
@@ -179,7 +227,6 @@ public:
REGISTER_TIMER_INFO
(
"mkldnn_bwdTimer"
,
getName
().
c_str
());
stream_
->
submit
(
pipelineBwd_
);
}
{
REGISTER_TIMER_INFO
(
"WeightUpdate"
,
getName
().
c_str
());
updateWeights
(
callback
);
...
...
@@ -195,7 +242,7 @@ public:
int
&
bs
,
int
&
ic
,
int
&
ih
,
int
&
iw
,
int
oc
,
int
&
oh
,
int
&
ow
)
=
0
;
/**
* reset the mkldnn forward primitve and memor
y
* reset the mkldnn forward primitve and memor
ies
* only would be called when input size changes
*/
virtual
void
resetFwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
...
...
@@ -205,7 +252,7 @@ public:
MKLDNNMatrixPtr
&
out
)
=
0
;
/**
* reset the mkldnn backward primitve and memor
y for mkldnn fc
* reset the mkldnn backward primitve and memor
ies
* only would be called when needed
*/
virtual
void
resetBwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
...
...
@@ -214,12 +261,6 @@ public:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
=
0
;
/**
* Update input value data when input layer is "data" type.
* Since the input value data address might be changed.
*/
virtual
void
updateInputData
()
{}
/**
* Update weights and biases if necessary.
*/
...
...
@@ -272,21 +313,167 @@ protected:
}
/**
* reset the output grad matrix from primitive desc.
* and reset the merge grad primitive if needed.
* note: when this layer has serval outputs,
* reset MKLDNNMatrix from Matrix and internal primitive desc.
* reset nullptr if matrix or primitive desc is empty
*/
void
resetWithMatrix
(
MKLDNNMatrixPtr
&
dnn
,
const
MatrixPtr
&
mat
,
mkldnn
::
memory
::
primitive_desc
pd
)
{
dnn
=
nullptr
;
if
(
mat
==
nullptr
)
{
return
;
}
dnn
=
MKLDNNMatrix
::
create
(
mat
,
pd
);
}
/**
* reset input value from input MKLDNNMatrix and internal primitive desc.
* reset both internal and external buffer and create reorder if necessary.
*/
void
resetInValue
(
MKLDNNMatrixPtr
&
in
,
const
std
::
shared_ptr
<
mkldnn
::
memory
::
primitive_desc
>&
intPD
=
nullptr
)
{
cvtInVal_
=
nullptr
;
extInVal_
=
nullptr
;
in
=
nullptr
;
CHECK_GT
(
bs_
*
ic_
*
ih_
*
iw_
,
0
);
auto
extPD
=
MKLDNNMatrix
::
createPrimitiveDesc
(
{
bs_
,
ic_
,
ih_
,
iw_
},
mkldnn
::
memory
::
format
::
nchw
,
engine_
);
const
MatrixPtr
&
inMat
=
inputLayers_
[
0
]
->
getOutputValue
();
in
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
inMat
);
CHECK_EQ
(
inputIsOnlyMKLDNN
(),
in
!=
nullptr
);
if
(
in
==
nullptr
||
in
->
getFormat
()
==
mkldnn
::
memory
::
format
::
nc
)
{
in
=
MKLDNNMatrix
::
create
(
inMat
,
extPD
);
}
extInVal_
=
isPaddleFormat
(
in
->
getFormat
())
?
in
:
nullptr
;
if
(
in
->
getFormat
()
==
mkldnn
::
memory
::
format
::
nc
)
{
CHECK
(
ih_
==
1
&&
iw_
==
1
);
}
if
(
nullptr
==
intPD
||
in
->
getPrimitiveDesc
()
==
*
intPD
)
{
return
;
}
// need create reorder
in
=
MKLDNNMatrix
::
create
(
nullptr
,
*
intPD
);
extInVal_
=
extInVal_
?
extInVal_
:
MKLDNNMatrix
::
create
(
inMat
,
extPD
);
cvtInVal_
=
MKLDNNMatrix
::
createReorder
(
extInVal_
,
in
);
CHECK
(
cvtInVal_
)
<<
"should not be emptry"
;
}
/**
* reset output value from internal primitive desc.
* reset both internal and external buffer and create reorder if necessary.
*/
void
resetOutValue
(
MKLDNNMatrixPtr
&
out
,
mkldnn
::
memory
::
primitive_desc
intPD
)
{
cvtOutVal_
=
nullptr
;
out
=
MKLDNNMatrix
::
create
(
output_
.
value
,
intPD
);
extOutVal_
=
out
;
if
(
outputIsOnlyMKLDNN
()
||
isPaddleFormat
(
extOutVal_
->
getFormat
()))
{
return
;
}
// need create reorder
CHECK_GT
(
bs_
*
oc_
*
oh_
*
ow_
,
0
);
extOutVal_
=
MKLDNNMatrix
::
create
(
output_
.
value
,
{
bs_
,
oc_
,
oh_
,
ow_
},
mkldnn
::
memory
::
format
::
nchw
,
engine_
);
out
=
MKLDNNMatrix
::
create
(
nullptr
,
intPD
);
cvtOutVal_
=
MKLDNNMatrix
::
createReorder
(
out
,
extOutVal_
);
CHECK
(
cvtOutVal_
)
<<
"should not be empty"
;
}
/**
* reset input grad from internal primitive desc.
* reset both internal and external buffer and create reorder if necessary.
*/
void
resetInGrad
(
MKLDNNMatrixPtr
&
in
,
mkldnn
::
memory
::
primitive_desc
intPD
)
{
cvtInGrad_
=
nullptr
;
extInGrad_
=
nullptr
;
in
=
nullptr
;
LayerPtr
&
input
=
inputLayers_
[
0
];
if
(
input
->
getOutputGrad
()
==
nullptr
)
{
// no need input grad
return
;
}
CHECK
(
inputIsOnlyMKLDNN
()
||
input
->
getOutputMapSize
()
<=
1
)
<<
"only support input is MKLDNN layer or only have one output layer"
;
// when input is a mkldnn branch node,
// this layer will save input grad to a internal buffer,
// and the mkldnn input layer will merge them to actual prev->output_.grad
const
MatrixPtr
&
inMat
=
input
->
getOutputMapSize
()
<=
1
?
input
->
getOutputGrad
()
:
nullptr
;
in
=
MKLDNNMatrix
::
create
(
inMat
,
intPD
);
Argument
&
arg
=
input
->
getOutput
(
this
->
getName
());
arg
.
grad
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
in
);
CHECK
(
inVal_
!=
nullptr
&&
inVal_
->
getPrimitiveDesc
()
==
intPD
)
<<
"should have internal input value and primitive desc must equal"
;
if
(
inputIsOnlyMKLDNN
())
{
return
;
}
extInGrad_
=
in
;
if
(
isPaddleFormat
(
extInGrad_
->
getFormat
()))
{
return
;
}
// need create reorder
CHECK
(
extInVal_
!=
nullptr
&&
isPaddleFormat
(
extInVal_
->
getFormat
()))
<<
"should have external input value and the format must be nchw(nc)"
;
extInGrad_
=
MKLDNNMatrix
::
create
(
inMat
,
extInVal_
->
getPrimitiveDesc
());
CHECK
(
inVal_
!=
nullptr
&&
inVal_
->
getPrimitiveDesc
()
==
intPD
)
<<
"should have internal input value and primitive desc must equal"
;
in
=
MKLDNNMatrix
::
create
(
nullptr
,
intPD
);
cvtInGrad_
=
MKLDNNMatrix
::
createReorder
(
in
,
extInGrad_
);
CHECK
(
cvtInGrad_
);
}
/**
* reset output grad from internal primitive desc.
* merge grad if necessary.
* reset both internal and external buffer and create reorder if necessary.
* note: about merge grad, when this layer has serval outputs,
* it could not be mixed with cpu device,
* since it can not get memory desc from cpu device.
*/
virtual
void
resetOutGrad
(
MKLDNNMatrixPtr
&
out
,
mkldnn
::
memory
::
primitive_desc
pd
)
{
CHECK
(
outputIsOnlyMKLDNN
())
<<
"do not support mixed with other device yet"
;
void
resetOutGrad
(
MKLDNNMatrixPtr
&
out
,
mkldnn
::
memory
::
primitive_desc
intPD
)
{
cvtOutGrad_
=
nullptr
;
extOutGrad_
=
nullptr
;
out
=
nullptr
;
MatrixPtr
&
outMat
=
output_
.
grad
;
out
=
MKLDNNMatrix
::
create
(
outMat
,
intPD
);
resetMergeGrad
(
out
);
if
(
outputIsOnlyMKLDNN
())
{
return
;
}
CHECK_LE
(
outputMap_
.
size
(),
1U
)
<<
"do not support mixed with cpu device"
;
extOutGrad_
=
out
;
if
(
isPaddleFormat
(
extOutGrad_
->
getFormat
()))
{
return
;
}
// need create reorder
CHECK
(
extOutVal_
!=
nullptr
&&
isPaddleFormat
(
extOutVal_
->
getFormat
()))
<<
"should have external output value and the format must be nchw(nc)"
;
extOutGrad_
=
MKLDNNMatrix
::
create
(
outMat
,
extOutVal_
->
getPrimitiveDesc
());
CHECK
(
outVal_
!=
nullptr
&&
outVal_
->
getPrimitiveDesc
()
==
intPD
)
<<
"should have internal output value and primitive desc must equal"
;
out
=
MKLDNNMatrix
::
create
(
nullptr
,
intPD
);
cvtOutGrad_
=
MKLDNNMatrix
::
createReorder
(
extOutGrad_
,
out
);
CHECK
(
cvtOutGrad_
);
}
/**
* reset the merge grad primitive if necessary.
* note: do not support the grads are mixed with cpu device,
* since it can not get memory desc from cpu device.
*/
virtual
void
resetMergeGrad
(
MKLDNNMatrixPtr
&
out
)
{
mergeGrad_
=
nullptr
;
pipelineMergeGrad_
.
clear
();
out
=
MKLDNNMatrix
::
create
(
output_
.
grad
,
pd
);
if
(
outputMap_
.
size
()
<=
1
)
{
if
(
outputMap_
.
size
()
<=
1
||
!
outputIsOnlyMKLDNN
())
{
// do not merge when output is not all MKLDNN or only one output
return
;
}
CHECK
(
out
)
<<
"should have reset internal ouput grad"
;
std
::
vector
<
double
>
scales
(
outputMap_
.
size
(),
1.0
);
std
::
vector
<
mkldnn
::
memory
::
primitive_desc
>
srcPDs
;
std
::
vector
<
mkldnn
::
primitive
::
at
>
srcs
;
...
...
@@ -309,15 +496,13 @@ protected:
for
(
size_t
i
=
1
;
i
<
srcPDs
.
size
();
++
i
)
{
CHECK
(
srcPDs
[
0
]
==
srcPDs
[
i
]);
}
tmpOutGrad_
=
nullptr
;
tmpOutGrad_
=
out
;
tmpCvt_
=
nullptr
;
if
(
out
->
getPrimitiveDesc
()
!=
srcPDs
[
0
])
{
tmpOutGrad_
=
MKLDNNMatrix
::
create
(
nullptr
,
srcPDs
[
0
]);
tmpCvt_
=
MKLDNNMatrix
::
createReorder
(
tmpOutGrad_
,
out
);
CHECK
(
tmpCvt_
);
pipelineMergeGrad_
.
push_back
(
*
tmpCvt_
);
}
else
{
tmpOutGrad_
=
out
;
}
auto
sumPD
=
mkldnn
::
sum
::
primitive_desc
(
...
...
@@ -326,21 +511,6 @@ protected:
pipelineMergeGrad_
.
insert
(
pipelineMergeGrad_
.
begin
(),
*
mergeGrad_
);
}
/**
* reset input grad from primitive desc.
* this function is avaiable for input is only mkldnn
* or input do not care cpu device
*/
virtual
void
resetInGrad
(
MKLDNNMatrixPtr
&
in
,
mkldnn
::
memory
::
primitive_desc
pd
)
{
LayerPtr
&
input
=
inputLayers_
[
0
];
const
MatrixPtr
&
grad
=
input
->
getOutputMapSize
()
>
1
?
nullptr
:
input
->
getOutput
().
grad
;
in
=
MKLDNNMatrix
::
create
(
grad
,
pd
);
Argument
&
arg
=
input
->
getOutput
(
this
->
getName
());
arg
.
grad
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
in
);
}
/**
* print info about sizes
*/
...
...
@@ -351,22 +521,50 @@ protected:
}
/**
*
Print the mkldnn memory format flow
of value
*
print the mkldnn memory format
of value
*/
virtual
void
printValueFormatFlow
()
{
if
(
inVal_
&&
outVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
inVal_
->
getFormat
()
<<
" >>> "
<<
outVal_
->
getFormat
();
virtual
void
printValueFormat
()
{
if
(
extInVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
extInVal_
->
getFormat
()
<<
" >>> "
;
}
if
(
inVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
inVal_
->
getFormat
()
<<
" >>>"
;
}
if
(
outVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
outVal_
->
getFormat
()
<<
" >>> "
;
}
if
(
extOutVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
extOutVal_
->
getFormat
();
}
if
(
wgtVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"Weight value format: "
<<
wgtVal_
->
getFormat
();
}
if
(
biasVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"Bias value format: "
<<
biasVal_
->
getFormat
();
}
}
/**
*
Print the mkldnn memory format flow
of grad
*
print the mkldnn memory format
of grad
*/
virtual
void
printGradFormatFlow
()
{
if
(
inGrad_
&&
outGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
inGrad_
->
getFormat
()
<<
" <<< "
<<
outGrad_
->
getFormat
();
virtual
void
printGradFormat
()
{
if
(
extInGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
extInGrad_
->
getFormat
()
<<
" <<< "
;
}
if
(
inGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
inGrad_
->
getFormat
()
<<
" <<<"
;
}
if
(
outGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
outGrad_
->
getFormat
()
<<
" <<< "
;
}
if
(
extOutGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
extOutGrad_
->
getFormat
();
}
if
(
wgtGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"Weight grad format: "
<<
wgtGrad_
->
getFormat
();
}
if
(
biasGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"Bias grad format: "
<<
biasGrad_
->
getFormat
();
}
}
...
...
@@ -405,6 +603,19 @@ protected:
void
setDevice
(
int
id
)
{
deviceId_
=
id
;
}
private:
/**
* check the format is nchw or nc,
* which is supported by Paddle default memory layout
*/
bool
isPaddleFormat
(
mkldnn
::
memory
::
format
fmt
)
{
if
(
fmt
==
mkldnn
::
memory
::
format
::
nchw
||
fmt
==
mkldnn
::
memory
::
format
::
nc
)
{
return
true
;
}
else
{
return
false
;
}
}
/**
* clear all grad
*/
...
...
@@ -449,6 +660,19 @@ private:
}
}
/**
* if have cpu device, share value and grad data with output_
*/
void
shareCPUDevice
()
{
if
(
outputIsOnlyMKLDNN
())
{
return
;
}
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
outputOtherDevice_
[
i
].
value
=
output_
.
value
;
outputOtherDevice_
[
i
].
grad
=
output_
.
grad
;
}
}
/**
* Check the cpu device number of outputOtherDevice_.
* should have only one at most.
...
...
paddle/gserver/layers/MKLDNNPoolLayer.cpp
浏览文件 @
c1914543
...
...
@@ -85,8 +85,6 @@ void MKLDNNPoolLayer::resetFwd(std::vector<primitive>& pipeline,
resetFwdPD
(
fwdPD_
,
in
,
out
);
resetFwdPipeline
(
pipeline
,
fwdPD_
,
in
,
out
);
printValueFormatFlow
();
}
void
MKLDNNPoolLayer
::
resetBwd
(
std
::
vector
<
primitive
>&
pipeline
,
...
...
@@ -101,65 +99,22 @@ void MKLDNNPoolLayer::resetBwd(std::vector<primitive>& pipeline,
resetBwdPD
(
pd
,
in
,
out
);
resetBwdPipeline
(
pipeline
,
pd
,
in
,
out
);
printGradFormatFlow
();
}
void
MKLDNNPoolLayer
::
updateInputData
()
{
inVal_
->
setData
(
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
());
}
void
MKLDNNPoolLayer
::
resetFwdBuffers
(
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
resetInValue
(
in
);
resetOutValue
(
out
);
}
void
MKLDNNPoolLayer
::
resetInValue
(
MKLDNNMatrixPtr
&
in
)
{
if
(
inputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
dnnIn
=
getInputValue
(
0
);
in
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
dnnIn
);
CHECK
(
in
)
<<
"Input should be MKLDNNMatrix"
;
}
else
{
CHECK_EQ
(
getPrev
(
0
)
->
getDeviceId
(),
CPU_DEVICE
)
<<
"Only support CPU yet"
;
const
MatrixPtr
&
cpuIn
=
getInputValue
(
0
,
CPU_DEVICE
);
in
=
MKLDNNMatrix
::
create
(
cpuIn
,
{
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
,
engine_
);
}
}
void
MKLDNNPoolLayer
::
resetOutValue
(
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
inVal_
)
<<
"Should reset input value first"
;
memory
::
dims
outDims
=
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
};
out
=
MKLDNNMatrix
::
create
(
output_
.
value
,
outDims
,
inVal_
->
getFormat
(),
engine_
);
// create reorder if output value has cpu device and pd do not match
cpuOutVal_
=
nullptr
;
cvtOutVal_
=
nullptr
;
if
(
!
outputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
value
;
cpuOutVal_
=
MKLDNNMatrix
::
create
(
cpuOut
,
outDims
,
format
::
nchw
,
engine_
);
if
(
cpuOutVal_
->
getPrimitiveDesc
()
!=
out
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
out
->
getPrimitiveDesc
());
cvtOutVal_
=
MKLDNNMatrix
::
createReorder
(
out
,
cpuOutVal_
);
CHECK
(
cvtOutVal_
)
<<
"should not be emptry"
;
}
else
{
cpuOut
->
setData
(
output_
.
value
->
getData
());
cpuOutVal_
=
out
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
cpuOutVal_
);
return
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
outVal_
);
CHECK
(
in
);
auto
outPD
=
MKLDNNMatrix
::
createPrimitiveDesc
(
outDims
,
in
->
getFormat
(),
engine_
);
resetOutValue
(
out
,
outPD
);
}
void
MKLDNNPoolLayer
::
resetFwdPD
(
std
::
shared_ptr
<
pool_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
in
,
MKLDNNMatrixPtr
out
)
{
memory
::
dims
inDims
=
memory
::
dims
{
bs_
,
ic_
,
ih_
,
iw_
};
memory
::
dims
outDims
=
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
};
memory
::
dims
kernels
=
memory
::
dims
{
fh_
,
fw_
};
memory
::
dims
strides
=
memory
::
dims
{
sh_
,
sw_
};
memory
::
dims
padL
=
memory
::
dims
{
ph_
,
pw_
};
...
...
@@ -194,58 +149,26 @@ void MKLDNNPoolLayer::resetFwdPipeline(
?
std
::
make_shared
<
pool_fwd
>
(
pool_fwd
(
*
pd
,
*
in
,
*
out
,
*
workspace_
))
:
std
::
make_shared
<
pool_fwd
>
(
pool_fwd
(
*
pd
,
*
in
,
*
out
));
pipeline
.
push_back
(
*
fwd_
);
if
(
cvtOutVal_
)
{
pipeline
.
push_back
(
*
cvtOutVal_
);
}
}
void
MKLDNNPoolLayer
::
resetBwdBuffers
(
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
resetOutGrad
(
out
);
resetInGrad
(
in
);
}
void
MKLDNNPoolLayer
::
resetOutGrad
(
MKLDNNMatrixPtr
&
out
)
{
cpuOutGrad_
=
nullptr
;
cvtOutGrad_
=
nullptr
;
CHECK
(
outVal_
);
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
// always share the same grad data of CPU output
// then the activation can get the right grad from output_.grad
output_
.
grad
->
setData
(
cpuOut
->
getData
());
cpuOutGrad_
=
MKLDNNMatrix
::
create
(
cpuOut
,
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
},
format
::
nchw
,
engine_
);
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
!=
outVal_
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
outVal_
->
getPrimitiveDesc
());
cvtOutGrad_
=
MKLDNNMatrix
::
createReorder
(
cpuOutGrad_
,
out
);
CHECK
(
cvtOutGrad_
)
<<
"should not be emptry"
;
}
else
{
out
=
cpuOutGrad_
;
}
}
}
void
MKLDNNPoolLayer
::
resetInGrad
(
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
if
(
inputLayers_
[
0
]
->
getOutput
().
grad
==
nullptr
)
{
return
;
}
CHECK
(
inVal_
);
MKLDNNLayer
::
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
CHECK
(
inVal_
&&
outVal_
);
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
}
void
MKLDNNPoolLayer
::
resetBwdPD
(
std
::
shared_ptr
<
pool_bwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
pd
=
nullptr
;
if
(
in
==
nullptr
)
{
return
;
}
memory
::
dims
kernels
=
memory
::
dims
{
fh_
,
fw_
};
memory
::
dims
strides
=
memory
::
dims
{
sh_
,
sw_
};
memory
::
dims
padL
=
memory
::
dims
{
ph_
,
pw_
};
memory
::
dims
padR
=
getPaddingR
();
CHECK
(
in
);
CHECK
(
out
);
auto
bwdDesc
=
pool_bwd
::
desc
(
poolAlgo_
,
in
->
getMemoryDesc
(),
...
...
@@ -263,8 +186,8 @@ void MKLDNNPoolLayer::resetBwdPipeline(
std
::
shared_ptr
<
pool_bwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
if
(
cvtOutGrad_
)
{
pipeline
.
push_back
(
*
cvtOutGrad_
)
;
if
(
pd
==
nullptr
)
{
return
;
}
bwdData_
=
...
...
paddle/gserver/layers/MKLDNNPoolLayer.h
浏览文件 @
c1914543
...
...
@@ -38,13 +38,6 @@ protected:
// pooling_avg or pooling_max
mkldnn
::
algorithm
poolAlgo_
;
// MKLDNNMatrixPtr which should be created from CPU Device
MKLDNNMatrixPtr
cpuOutVal_
;
MKLDNNMatrixPtr
cpuOutGrad_
;
// convert handle between CPU device and MKLDNN device
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutGrad_
;
// save forward primitive_desc, which can be used backward
std
::
shared_ptr
<
pool_fwd
::
primitive_desc
>
fwdPD_
;
// according to https://github.com/01org/mkl-dnn/blob/master/tests/gtests/
...
...
@@ -74,8 +67,6 @@ public:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
override
;
void
updateInputData
()
override
;
void
printSizeInfo
()
override
{
MKLDNNLayer
::
printSizeInfo
();
VLOG
(
MKLDNN_SIZES
)
<<
getName
()
<<
": fh: "
<<
fh_
<<
", fw: "
<<
fw_
...
...
@@ -90,8 +81,6 @@ protected:
* reset pipeline.
*/
void
resetFwdBuffers
(
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
);
void
resetInValue
(
MKLDNNMatrixPtr
&
in
);
void
resetOutValue
(
MKLDNNMatrixPtr
&
out
);
void
resetFwdPD
(
std
::
shared_ptr
<
pool_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
in
,
MKLDNNMatrixPtr
out
);
...
...
@@ -106,8 +95,6 @@ protected:
* reset pipeline.
*/
void
resetBwdBuffers
(
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
);
void
resetOutGrad
(
MKLDNNMatrixPtr
&
out
);
void
resetInGrad
(
MKLDNNMatrixPtr
&
in
);
void
resetBwdPD
(
std
::
shared_ptr
<
pool_bwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
);
...
...
paddle/math/MKLDNNMatrix.cpp
浏览文件 @
c1914543
...
...
@@ -46,7 +46,7 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m,
memory
::
format
fmt
,
engine
&
eg
,
mkldnn
::
memory
::
data_type
dtype
)
{
return
create
(
m
,
memory
::
primitive_desc
(
memory
::
desc
(
dims
,
dtype
,
fmt
),
eg
));
return
create
(
m
,
createPrimitiveDesc
(
dims
,
fmt
,
eg
,
dtype
));
}
std
::
shared_ptr
<
reorder
>
MKLDNNMatrix
::
createReorder
(
const
MKLDNNMatrixPtr
&
src
,
...
...
paddle/math/MKLDNNMatrix.h
浏览文件 @
c1914543
...
...
@@ -52,12 +52,24 @@ public:
mkldnn
::
engine
&
eg
,
mkldnn
::
memory
::
data_type
dtype
=
mkldnn
::
memory
::
data_type
::
f32
);
/**
* Create primitive descriptor.
* default with f32 dtype
*/
static
mkldnn
::
memory
::
primitive_desc
createPrimitiveDesc
(
const
mkldnn
::
memory
::
dims
dims
,
const
mkldnn
::
memory
::
format
&
fmt
,
const
mkldnn
::
engine
&
eg
,
const
mkldnn
::
memory
::
data_type
&
dtype
=
mkldnn
::
memory
::
data_type
::
f32
)
{
return
mkldnn
::
memory
::
primitive_desc
(
memory
::
desc
(
dims
,
dtype
,
fmt
),
eg
);
}
/**
* Create Memory descriptor.
* default with any format and f32 dtype
*/
static
mkldnn
::
memory
::
desc
createMemoryDesc
(
const
mkldnn
::
memory
::
dims
&
dims
,
const
mkldnn
::
memory
::
dims
dims
,
const
mkldnn
::
memory
::
format
&
fmt
=
mkldnn
::
memory
::
format
::
any
,
const
mkldnn
::
memory
::
data_type
&
dtype
=
mkldnn
::
memory
::
data_type
::
f32
)
{
return
mkldnn
::
memory
::
desc
(
dims
,
dtype
,
fmt
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录