Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
c1914543
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c1914543
编写于
10月 19, 2017
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine mkldnn logic, move reset buffers into MKLDNNLayer
上级
60b84856
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
358 addition
and
506 deletion
+358
-506
paddle/gserver/layers/MKLDNNConvLayer.cpp
paddle/gserver/layers/MKLDNNConvLayer.cpp
+33
-200
paddle/gserver/layers/MKLDNNConvLayer.h
paddle/gserver/layers/MKLDNNConvLayer.h
+0
-66
paddle/gserver/layers/MKLDNNFcLayer.cpp
paddle/gserver/layers/MKLDNNFcLayer.cpp
+24
-77
paddle/gserver/layers/MKLDNNFcLayer.h
paddle/gserver/layers/MKLDNNFcLayer.h
+0
-8
paddle/gserver/layers/MKLDNNLayer.h
paddle/gserver/layers/MKLDNNLayer.h
+274
-50
paddle/gserver/layers/MKLDNNPoolLayer.cpp
paddle/gserver/layers/MKLDNNPoolLayer.cpp
+13
-90
paddle/gserver/layers/MKLDNNPoolLayer.h
paddle/gserver/layers/MKLDNNPoolLayer.h
+0
-13
paddle/math/MKLDNNMatrix.cpp
paddle/math/MKLDNNMatrix.cpp
+1
-1
paddle/math/MKLDNNMatrix.h
paddle/math/MKLDNNMatrix.h
+13
-1
未找到文件。
paddle/gserver/layers/MKLDNNConvLayer.cpp
浏览文件 @
c1914543
...
...
@@ -116,8 +116,6 @@ void MKLDNNConvLayer::resetFwd(std::vector<primitive>& pipeline,
resetFwdBuffers
(
fwdPD_
,
in
,
wgt
,
bias
,
out
);
resetFwdPipeline
(
pipeline
,
fwdPD_
,
in
,
wgt
,
bias
,
out
);
printValueFormatFlow
();
}
void
MKLDNNConvLayer
::
resetBwd
(
std
::
vector
<
primitive
>&
pipeline
,
...
...
@@ -135,12 +133,6 @@ void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline,
resetBwdBuffers
(
bwdWgtPD
,
bwdDataPD
,
in
,
wgt
,
bias
,
out
);
resetBwdPipeline
(
pipeline
,
bwdWgtPD
,
bwdDataPD
,
in
,
wgt
,
bias
,
out
);
printGradFormatFlow
();
}
void
MKLDNNConvLayer
::
updateInputData
()
{
cpuInVal_
->
setData
(
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
());
}
void
MKLDNNConvLayer
::
updateWeights
(
const
UpdateCallback
&
callback
)
{
...
...
@@ -211,11 +203,18 @@ void MKLDNNConvLayer::resetFwdBuffers(
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
pd
);
resetInValue
(
pd
,
in
);
resetInValue
(
in
,
std
::
make_shared
<
memory
::
primitive_desc
>
(
pd
->
src_primitive_desc
()));
resetOutValue
(
out
,
pd
->
dst_primitive_desc
());
resetW
gtBiasValue
(
pd
,
wgt
,
bias
);
resetW
ithMatrix
(
wgt
,
weight_
->
getW
(),
pd
->
weights_primitive_desc
()
);
resetOutValue
(
pd
,
out
);
bias
=
nullptr
;
if
(
biases_
==
nullptr
||
biases_
->
getW
()
==
nullptr
)
{
return
;
}
resetWithMatrix
(
bias
,
biases_
->
getW
(),
pd
->
bias_primitive_desc
());
}
void
MKLDNNConvLayer
::
resetFwdPipeline
(
...
...
@@ -225,104 +224,12 @@ void MKLDNNConvLayer::resetFwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
if
(
cvtInVal_
)
{
pipeline
.
push_back
(
*
cvtInVal_
);
}
if
(
bias
)
{
fwd_
.
reset
(
new
conv_fwd
(
*
pd
,
*
in
,
*
wgt
,
*
bias
,
*
out
));
}
else
{
fwd_
.
reset
(
new
conv_fwd
(
*
pd
,
*
in
,
*
wgt
,
*
out
));
}
pipeline
.
push_back
(
*
fwd_
);
if
(
cvtOutVal_
)
{
pipeline
.
push_back
(
*
cvtOutVal_
);
}
}
void
MKLDNNConvLayer
::
resetInValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
)
{
const
MatrixPtr
&
inMat
=
inputLayers_
[
0
]
->
getOutputValue
();
in
=
MKLDNNMatrix
::
create
(
inMat
,
pd
->
src_primitive_desc
());
// create buffer and reorder if input value do not match
cpuInVal_
=
nullptr
;
cvtInVal_
=
nullptr
;
MKLDNNMatrixPtr
dnnIn
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
inMat
);
CHECK_EQ
(
inputIsOnlyMKLDNN
(),
dnnIn
!=
nullptr
);
if
(
dnnIn
!=
nullptr
&&
dnnIn
->
getPrimitiveDesc
()
==
in
->
getPrimitiveDesc
())
{
in
=
dnnIn
;
return
;
}
if
(
dnnIn
)
{
if
(
dnnIn
->
getFormat
()
==
format
::
nc
)
{
CHECK
(
ih_
==
1
&&
iw_
==
1
)
<<
"when input is nc format"
;
// create a new one with nchw format and same data
memory
::
dims
inDims
=
memory
::
dims
{
bs_
,
ic_
,
1
,
1
};
dnnIn
=
MKLDNNMatrix
::
create
(
inMat
,
inDims
,
format
::
nchw
,
engine_
);
}
if
(
dnnIn
->
getPrimitiveDesc
()
==
in
->
getPrimitiveDesc
())
{
in
=
dnnIn
;
return
;
}
cpuInVal_
=
dnnIn
;
in
=
MKLDNNMatrix
::
create
(
nullptr
,
pd
->
src_primitive_desc
());
cvtInVal_
=
MKLDNNMatrix
::
createReorder
(
cpuInVal_
,
in
);
CHECK
(
cvtInVal_
)
<<
"should not be emptry"
;
}
else
{
memory
::
dims
inDims
=
memory
::
dims
{
bs_
,
ic_
,
ih_
,
iw_
};
cpuInVal_
=
MKLDNNMatrix
::
create
(
inMat
,
inDims
,
format
::
nchw
,
engine_
);
if
(
cpuInVal_
->
getPrimitiveDesc
()
!=
in
->
getPrimitiveDesc
())
{
// create new mkldnn matrix
in
=
MKLDNNMatrix
::
create
(
nullptr
,
pd
->
src_primitive_desc
());
cvtInVal_
=
MKLDNNMatrix
::
createReorder
(
cpuInVal_
,
in
);
CHECK
(
cvtInVal_
)
<<
"should not be emptry"
;
}
else
{
in
=
cpuInVal_
;
}
}
}
void
MKLDNNConvLayer
::
resetWgtBiasValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
)
{
wgt
=
MKLDNNMatrix
::
create
(
weight_
->
getW
(),
pd
->
weights_primitive_desc
());
VLOG
(
MKLDNN_FMTS
)
<<
"Weight value format: "
<<
wgt
->
getFormat
();
bias
=
(
biases_
&&
biases_
->
getW
())
?
MKLDNNMatrix
::
create
(
biases_
->
getW
(),
pd
->
bias_primitive_desc
())
:
nullptr
;
}
void
MKLDNNConvLayer
::
resetOutValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
out
)
{
out
=
MKLDNNMatrix
::
create
(
output_
.
value
,
pd
->
dst_primitive_desc
());
// create reorder if output value has cpu device and pd do not match
cpuOutVal_
=
nullptr
;
cvtOutVal_
=
nullptr
;
if
(
!
outputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
value
;
memory
::
dims
outDims
=
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
};
cpuOutVal_
=
MKLDNNMatrix
::
create
(
cpuOut
,
outDims
,
format
::
nchw
,
engine_
);
if
(
cpuOutVal_
->
getPrimitiveDesc
()
!=
pd
->
dst_primitive_desc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
pd
->
dst_primitive_desc
());
cvtOutVal_
=
MKLDNNMatrix
::
createReorder
(
out
,
cpuOutVal_
);
CHECK
(
cvtOutVal_
)
<<
"should not be empty"
;
}
else
{
cpuOut
->
setData
(
output_
.
value
->
getData
());
cpuOutVal_
=
out
;
}
// when output is cpu device, change the mkldnn output value and make them
// share the same data. Then if next layer use inputlayer->getOuputValue()
// to achieve the input value, it will get the right data.
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
cpuOutVal_
);
return
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
out
);
}
void
MKLDNNConvLayer
::
resetBwdWgtPD
(
...
...
@@ -331,8 +238,8 @@ void MKLDNNConvLayer::resetBwdWgtPD(
loadConvSettings
(
wgtDims
,
biasDims
,
strides
,
dilations
,
padL
,
padR
);
// create backward weight using input, output and weight value memory desc
CHECK
(
inVal_
)
<<
"Should have input value"
;
CHECK
(
outVal_
)
<<
"Should have output value"
;
CHECK
(
inVal_
)
<<
"Should have in
ternal in
put value"
;
CHECK
(
outVal_
)
<<
"Should have
internal
output value"
;
CHECK
(
wgtVal_
)
<<
"Should have weight value"
;
algorithm
algo
=
algorithm
::
convolution_direct
;
padding_kind
padKind
=
padding_kind
::
zero
;
...
...
@@ -372,8 +279,8 @@ void MKLDNNConvLayer::resetBwdDataPD(
memory
::
dims
wgtDims
,
biasDims
,
strides
,
dilations
,
padL
,
padR
;
loadConvSettings
(
wgtDims
,
biasDims
,
strides
,
dilations
,
padL
,
padR
);
CHECK
(
inVal_
)
<<
"Should have input value"
;
CHECK
(
outVal_
)
<<
"Should have output value"
;
CHECK
(
inVal_
)
<<
"Should have in
ternal in
put value"
;
CHECK
(
outVal_
)
<<
"Should have
internal
output value"
;
// create backward data using input and output value memory desc
// but using weight memory desc with any format
auto
bwdDataDesc
=
conv_bwdData
::
desc
(
algorithm
::
convolution_direct
,
...
...
@@ -399,12 +306,27 @@ void MKLDNNConvLayer::resetBwdBuffers(
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
wgtPD
);
resetOutGrad
(
wgtPD
,
out
);
resetOutGrad
(
out
,
wgtPD
->
diff_dst_primitive_desc
()
);
resetWgtBiasGrad
(
wgtPD
,
wgt
,
bias
);
resetWithMatrix
(
wgt
,
weight_
->
getWGrad
(),
wgtPD
->
diff_weights_primitive_desc
());
CHECK
(
wgtVal_
!=
nullptr
&&
wgt
->
getPrimitiveDesc
()
==
wgtVal_
->
getPrimitiveDesc
())
<<
"primitive desc of weight grad and value should be equal"
;
resetInGrad
(
dataPD
,
in
);
bias
=
nullptr
;
if
(
biases_
&&
biases_
->
getWGrad
())
{
resetWithMatrix
(
bias
,
biases_
->
getWGrad
(),
wgtPD
->
diff_bias_primitive_desc
());
CHECK
(
bias
&&
biasVal_
&&
bias
->
getPrimitiveDesc
()
==
biasVal_
->
getPrimitiveDesc
())
<<
"primitive desc of bias grad should equal the bias value"
;
}
if
(
dataPD
==
nullptr
)
{
return
;
}
resetInGrad
(
in
,
dataPD
->
diff_src_primitive_desc
());
resetWgtValBwdData
(
dataPD
,
wgtValBwdData_
);
}
...
...
@@ -416,10 +338,7 @@ void MKLDNNConvLayer::resetBwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
if
(
cvtOutGrad_
)
{
pipeline
.
push_back
(
*
cvtOutGrad_
);
}
CHECK
(
inVal_
);
// add bwdWgt handle
if
(
bias
)
{
bwdWgt_
.
reset
(
new
conv_bwdWgt
(
*
wgtPD
,
*
inVal_
,
*
out
,
*
wgt
,
*
bias
));
...
...
@@ -431,99 +350,13 @@ void MKLDNNConvLayer::resetBwdPipeline(
if
(
dataPD
==
nullptr
)
{
return
;
}
if
(
cvtWgtVal_
)
{
pipeline
.
push_back
(
*
cvtWgtVal_
);
}
// add bwdData handle
CHECK
(
wgtValBwdData_
)
<<
"Should have weight memory"
;
bwdData_
.
reset
(
new
conv_bwdData
(
*
dataPD
,
*
out
,
*
wgtValBwdData_
,
*
in
));
pipeline
.
push_back
(
*
bwdData_
);
if
(
cvtInGrad_
)
{
pipeline
.
push_back
(
*
cvtInGrad_
);
}
}
void
MKLDNNConvLayer
::
resetOutGrad
(
std
::
shared_ptr
<
conv_bwdWgt
::
primitive_desc
>&
wgtPD
,
MKLDNNMatrixPtr
&
out
)
{
cpuOutGrad_
=
nullptr
;
cvtOutGrad_
=
nullptr
;
CHECK
(
outVal_
!=
nullptr
&&
outVal_
->
getPrimitiveDesc
()
==
wgtPD
->
diff_dst_primitive_desc
())
<<
"primitive desc of out grad and value should be equal"
;
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
// always share the same grad data of CPU output
// then the activation can get the right grad from output_.grad
output_
.
grad
->
setData
(
cpuOut
->
getData
());
// same PrimitiveDesc with cpuInVal_
CHECK
(
cpuOutVal_
);
cpuOutGrad_
=
MKLDNNMatrix
::
create
(
cpuOut
,
cpuOutVal_
->
getPrimitiveDesc
());
// create reorder if primitive desc does not match
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
!=
outVal_
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
outVal_
->
getPrimitiveDesc
());
cvtOutGrad_
=
MKLDNNMatrix
::
createReorder
(
cpuOutGrad_
,
out
);
CHECK
(
cvtOutGrad_
);
}
else
{
out
=
cpuOutGrad_
;
}
}
}
void
MKLDNNConvLayer
::
resetWgtBiasGrad
(
std
::
shared_ptr
<
conv_bwdWgt
::
primitive_desc
>&
wgtPD
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
)
{
wgt
=
MKLDNNMatrix
::
create
(
weight_
->
getWGrad
(),
wgtPD
->
diff_weights_primitive_desc
());
CHECK
(
nullptr
!=
wgtVal_
&&
wgt
->
getPrimitiveDesc
()
==
wgtVal_
->
getPrimitiveDesc
())
<<
"primitive desc of weight grad and value should be equal"
;
VLOG
(
MKLDNN_FMTS
)
<<
"weight grad format: "
<<
wgt
->
getFormat
();
bias
=
nullptr
;
if
(
biasVal_
==
nullptr
)
{
return
;
}
bias
=
MKLDNNMatrix
::
create
(
biases_
->
getWGrad
(),
wgtPD
->
diff_bias_primitive_desc
());
CHECK
(
bias
->
getPrimitiveDesc
()
==
biasVal_
->
getPrimitiveDesc
())
<<
"primitive desc of bias grad should equal the bias value"
;
}
void
MKLDNNConvLayer
::
resetInGrad
(
std
::
shared_ptr
<
conv_bwdData
::
primitive_desc
>&
dataPD
,
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
cpuInGrad_
=
nullptr
;
cvtInGrad_
=
nullptr
;
if
(
dataPD
==
nullptr
)
{
return
;
}
if
(
inputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetInGrad
(
in
,
dataPD
->
diff_src_primitive_desc
());
CHECK
(
nullptr
!=
inVal_
&&
in
->
getPrimitiveDesc
()
==
inVal_
->
getPrimitiveDesc
())
<<
"primitive desc of input grad and value should be equal"
;
}
else
{
const
MatrixPtr
&
cpuIn
=
getInputGrad
(
0
,
CPU_DEVICE
);
// same PrimitiveDesc with cpuInVal_
CHECK
(
cpuInVal_
);
cpuInGrad_
=
MKLDNNMatrix
::
create
(
cpuIn
,
cpuInVal_
->
getPrimitiveDesc
());
in
=
cpuInGrad_
;
// create reorder if PrimitiveDesc does not match
if
(
cpuInGrad_
->
getPrimitiveDesc
()
!=
dataPD
->
diff_src_primitive_desc
())
{
in
=
MKLDNNMatrix
::
create
(
getInputGrad
(
0
,
MKLDNN_DEVICE
),
dataPD
->
diff_src_primitive_desc
());
cvtInGrad_
=
MKLDNNMatrix
::
createReorder
(
in
,
cpuInGrad_
);
CHECK
(
cvtInGrad_
);
}
}
}
void
MKLDNNConvLayer
::
resetWgtValBwdData
(
...
...
paddle/gserver/layers/MKLDNNConvLayer.h
浏览文件 @
c1914543
...
...
@@ -48,17 +48,6 @@ protected:
// save forward primitive_desc, which can be used backward
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>
fwdPD_
;
// MKLDNNMatrixPtr which should be created from CPU Device
MKLDNNMatrixPtr
cpuInVal_
;
MKLDNNMatrixPtr
cpuInGrad_
;
MKLDNNMatrixPtr
cpuOutVal_
;
MKLDNNMatrixPtr
cpuOutGrad_
;
// convert handle between CPU device and MKLDNN device
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtInVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtInGrad_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutGrad_
;
// whether the weight has been init
bool
hasInitedWgt_
;
...
...
@@ -94,8 +83,6 @@ public:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
override
;
void
updateInputData
()
override
;
void
updateWeights
(
const
UpdateCallback
&
callback
)
override
;
void
convertWeightsFromPaddle
()
override
;
...
...
@@ -109,26 +96,6 @@ public:
<<
", sw: "
<<
sw_
<<
", dh: "
<<
dh_
<<
", dw: "
<<
dw_
;
}
void
printValueFormatFlow
()
override
{
if
(
cpuInVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
cpuInVal_
->
getFormat
()
<<
" >>>"
;
}
MKLDNNLayer
::
printValueFormatFlow
();
if
(
cpuOutVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
" >>> "
<<
cpuOutVal_
->
getFormat
();
}
}
void
printGradFormatFlow
()
override
{
if
(
cpuInGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
cpuInGrad_
->
getFormat
()
<<
" <<<"
;
}
MKLDNNLayer
::
printGradFormatFlow
();
if
(
cpuOutGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
" <<< "
<<
cpuOutGrad_
->
getFormat
();
}
}
protected:
/**
* load the dims settings of this conv
...
...
@@ -162,23 +129,6 @@ protected:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
);
/**
* reset MKLDNNMatrix of input value
*/
void
resetInValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
);
/**
* reset MKLDNNMatrix of weight and bias value
*/
void
resetWgtBiasValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
);
/**
* reset MKLDNNMatrix of output value
*/
void
resetOutValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
out
);
/**
* reset the backward weight primitive descriptor.
*/
...
...
@@ -207,22 +157,6 @@ protected:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
);
/**
* reset MKLDNNMatrix of output grad
*/
void
resetOutGrad
(
std
::
shared_ptr
<
conv_bwdWgt
::
primitive_desc
>&
wgtPD
,
MKLDNNMatrixPtr
&
out
);
/**
* reset MKLDNNMatrix of weight and bias grad
*/
void
resetWgtBiasGrad
(
std
::
shared_ptr
<
conv_bwdWgt
::
primitive_desc
>&
wgtPD
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
);
/**
* reset MKLDNNMatrix of input grad
*/
void
resetInGrad
(
std
::
shared_ptr
<
conv_bwdData
::
primitive_desc
>&
dataPD
,
MKLDNNMatrixPtr
&
in
);
/**
* reset MKLDNNMatrix of weight value for backward data
* since the primitive_desc would be different with wgtVal_
...
...
paddle/gserver/layers/MKLDNNFcLayer.cpp
浏览文件 @
c1914543
...
...
@@ -62,7 +62,7 @@ void MKLDNNFcLayer::convertWeightsFromPaddle() {
CHECK
(
wgtVal_
)
<<
"should have been initialized"
;
bool
hasNoSpatial_
=
ih_
==
1
&&
iw_
==
1
;
auto
targetDim
=
wgtVal_
->
getDims
();
auto
srcFmt
=
hasNoSpatial_
?
memory
::
format
::
io
:
memory
::
format
::
ihwo
;
auto
srcFmt
=
hasNoSpatial_
?
format
::
io
:
format
::
ihwo
;
wgtVal_
->
reorderDataFrom
(
wgtVal_
,
srcFmt
,
targetDim
);
hasInitedWgt_
=
true
;
}
...
...
@@ -71,7 +71,7 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
CHECK
(
wgtVal_
)
<<
"should have been initialized"
;
bool
hasNoSpatial_
=
ih_
==
1
&&
iw_
==
1
;
auto
targetDim
=
wgtVal_
->
getDims
();
auto
dstFmt
=
hasNoSpatial_
?
memory
::
format
::
io
:
memory
::
format
::
ihwo
;
auto
dstFmt
=
hasNoSpatial_
?
format
::
io
:
format
::
ihwo
;
wgtVal_
->
reorderDataTo
(
wgtVal_
,
dstFmt
,
targetDim
);
}
...
...
@@ -100,8 +100,6 @@ void MKLDNNFcLayer::resetFwd(std::vector<primitive>& pipeline,
resetFwdPD
(
fwdPD_
,
in
,
wgt
,
bias
,
out
);
resetFwdPipeline
(
pipeline
,
fwdPD_
,
in
,
wgt
,
bias
,
out
);
printValueFormatFlow
();
}
void
MKLDNNFcLayer
::
resetBwd
(
std
::
vector
<
primitive
>&
pipeline
,
...
...
@@ -119,12 +117,6 @@ void MKLDNNFcLayer::resetBwd(std::vector<primitive>& pipeline,
resetBwdDataPD
(
bwdDataPD
,
in
,
out
);
resetBwdPipeline
(
pipeline
,
bwdWgtPD
,
bwdDataPD
,
in
,
wgt
,
bias
,
out
);
printGradFormatFlow
();
}
void
MKLDNNFcLayer
::
updateInputData
()
{
inVal_
->
setData
(
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
());
}
void
MKLDNNFcLayer
::
updateWeights
(
const
UpdateCallback
&
callback
)
{
...
...
@@ -139,51 +131,33 @@ void MKLDNNFcLayer::resetFwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
resetInValue
(
in
);
CHECK
(
in
);
in
->
downSpatial
();
resetWgtBiasValue
(
wgt
,
bias
);
resetOutValue
(
out
);
}
// if (extInVal_) {
// extInVal_->downSpatial();
// }
void
MKLDNNFcLayer
::
resetInValue
(
MKLDNNMatrixPtr
&
in
)
{
if
(
inputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
dnnIn
=
getInputValue
(
0
);
in
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
dnnIn
);
CHECK
(
in
)
<<
"Input should be MKLDNNMatrix"
;
}
else
{
CHECK_EQ
(
getPrev
(
0
)
->
getDeviceId
(),
CPU_DEVICE
)
<<
"Only support CPU yet"
;
const
MatrixPtr
&
cpuIn
=
getInputValue
(
0
,
CPU_DEVICE
);
in
=
MKLDNNMatrix
::
create
(
cpuIn
,
{
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
,
engine_
);
}
in
->
downSpatial
();
}
auto
outPD
=
MKLDNNMatrix
::
createPrimitiveDesc
({
bs_
,
oc_
},
format
::
nc
,
engine_
);
resetOutValue
(
out
,
outPD
);
void
MKLDNNFcLayer
::
resetWgtBiasValue
(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
)
{
format
wgtFmt
=
format
::
oihw
;
if
(
in
Val_
->
getFormat
()
==
format
::
nChw8c
)
{
if
(
in
->
getFormat
()
==
format
::
nChw8c
)
{
wgtFmt
=
format
::
oIhw8i
;
}
else
if
(
in
Val_
->
getFormat
()
==
format
::
nChw16c
)
{
}
else
if
(
in
->
getFormat
()
==
format
::
nChw16c
)
{
wgtFmt
=
format
::
oIhw16i
;
}
wgt
=
MKLDNNMatrix
::
create
(
weight_
->
getW
(),
{
oc_
,
ic_
,
ih_
,
iw_
},
wgtFmt
,
engine_
);
auto
wgtPD
=
MKLDNNMatrix
::
createPrimitiveDesc
({
oc_
,
ic_
,
ih_
,
iw_
},
wgtFmt
,
engine_
);
resetWithMatrix
(
wgt
,
weight_
->
getW
(),
wgtPD
);
wgt
->
downSpatial
();
VLOG
(
MKLDNN_FMTS
)
<<
"Weight value format: "
<<
wgt
->
getFormat
();
bias
=
(
biases_
&&
biases_
->
getW
())
?
MKLDNNMatrix
::
create
(
biases_
->
getW
(),
{
oc_
},
format
::
x
,
engine_
)
:
nullptr
;
}
void
MKLDNNFcLayer
::
resetOutValue
(
MKLDNNMatrixPtr
&
out
)
{
out
=
MKLDNNMatrix
::
create
(
output_
.
value
,
{
bs_
,
oc_
},
format
::
nc
,
engine_
);
if
(
!
outputIsOnlyMKLDNN
())
{
// fc cpu output value do not need create convert, just share data
getOutput
(
CPU_DEVICE
).
value
->
setData
(
out
->
getData
());
if
(
biases_
==
nullptr
||
biases_
->
getW
()
==
nullptr
)
{
return
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
out
);
auto
biasPD
=
MKLDNNMatrix
::
createPrimitiveDesc
({
oc_
},
format
::
x
,
engine_
);
resetWithMatrix
(
bias
,
biases_
->
getW
(),
biasPD
);
}
void
MKLDNNFcLayer
::
resetFwdPD
(
std
::
shared_ptr
<
fc_fwd
::
primitive_desc
>&
pd
,
...
...
@@ -219,7 +193,6 @@ void MKLDNNFcLayer::resetFwdPipeline(
}
else
{
fwd_
.
reset
(
new
fc_fwd
(
*
pd
,
*
in
,
*
wgt
,
*
out
));
}
pipeline
.
push_back
(
*
fwd_
);
}
...
...
@@ -227,44 +200,18 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
resetOutGrad
(
out
);
resetWgtBiasGrad
(
wgt
,
bias
);
resetInGrad
(
in
);
}
void
MKLDNNFcLayer
::
resetOutGrad
(
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
outVal_
);
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
output_
.
grad
->
setData
(
cpuOut
->
getData
());
out
=
MKLDNNMatrix
::
create
(
cpuOut
,
outVal_
->
getPrimitiveDesc
());
}
}
CHECK
(
inVal_
&&
outVal_
);
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
void
MKLDNNFcLayer
::
resetWgtBiasGrad
(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
)
{
CHECK
(
wgtVal_
);
wgt
=
MKLDNNMatrix
::
create
(
weight_
->
getWGrad
(),
wgtVal_
->
getPrimitiveDesc
());
resetWithMatrix
(
wgt
,
weight_
->
getWGrad
(),
wgtVal_
->
getPrimitiveDesc
());
bias
=
nullptr
;
if
(
biasVal_
==
nullptr
)
{
return
;
}
bias
=
MKLDNNMatrix
::
create
(
biases_
->
getWGrad
(),
biasVal_
->
getPrimitiveDesc
());
}
void
MKLDNNFcLayer
::
resetInGrad
(
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
if
(
inputLayers_
[
0
]
->
getOutput
().
grad
==
nullptr
)
{
return
;
}
CHECK
(
inVal_
);
MKLDNNLayer
::
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
resetWithMatrix
(
bias
,
biases_
->
getWGrad
(),
biasVal_
->
getPrimitiveDesc
());
}
void
MKLDNNFcLayer
::
resetBwdWgtPD
(
...
...
paddle/gserver/layers/MKLDNNFcLayer.h
浏览文件 @
c1914543
...
...
@@ -66,8 +66,6 @@ public:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
override
;
void
updateInputData
()
override
;
void
updateWeights
(
const
UpdateCallback
&
callback
)
override
;
void
convertWeightsFromPaddle
()
override
;
...
...
@@ -84,9 +82,6 @@ protected:
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
);
void
resetInValue
(
MKLDNNMatrixPtr
&
in
);
void
resetWgtBiasValue
(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
);
void
resetOutValue
(
MKLDNNMatrixPtr
&
out
);
void
resetFwdPD
(
std
::
shared_ptr
<
fc_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
in
,
MKLDNNMatrixPtr
wgt
,
...
...
@@ -109,9 +104,6 @@ protected:
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
);
void
resetOutGrad
(
MKLDNNMatrixPtr
&
out
);
void
resetWgtBiasGrad
(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
);
void
resetInGrad
(
MKLDNNMatrixPtr
&
in
);
void
resetBwdWgtPD
(
std
::
shared_ptr
<
fc_bwdWgt
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
...
...
paddle/gserver/layers/MKLDNNLayer.h
浏览文件 @
c1914543
...
...
@@ -58,11 +58,30 @@ protected:
std
::
vector
<
mkldnn
::
primitive
>
pipelineFwd_
;
std
::
vector
<
mkldnn
::
primitive
>
pipelineBwd_
;
// MKLDNNMatrixPtr with internal format
/// value and grad are seperate as internal and external buffers.
/// each MKLDNNLayer must init or reset internal buffer at least,
/// and the external buffer format is always nchw of nc(when h==w==1),
/// which is the same format as paddle.
/// When mixed with cpu device, the output_.value and output_.grad
/// always save the external data.
/// When all layers are all mkldnn layers, they could be internal data.
/// below MKLDNNMatrix buffers are all internal buffers
MKLDNNMatrixPtr
inVal_
;
MKLDNNMatrixPtr
inGrad_
;
MKLDNNMatrixPtr
outVal_
;
MKLDNNMatrixPtr
outGrad_
;
// below are external value and grad
MKLDNNMatrixPtr
extInVal_
;
MKLDNNMatrixPtr
extInGrad_
;
MKLDNNMatrixPtr
extOutVal_
;
MKLDNNMatrixPtr
extOutGrad_
;
// convert handle between external and internal buffers
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtInVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtInGrad_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutGrad_
;
// weight and bias are always internal buffers
MKLDNNMatrixPtr
wgtVal_
;
MKLDNNMatrixPtr
wgtGrad_
;
MKLDNNMatrixPtr
biasVal_
;
...
...
@@ -91,6 +110,7 @@ public:
oh_
(
0
),
ow_
(
0
),
needResetBwd_
(
true
),
outputOnlyMKLDNN_
(
false
),
engine_
(
mkldnn
::
engine
::
cpu
,
0
),
stream_
(
nullptr
),
fwd_
(
nullptr
),
...
...
@@ -128,20 +148,39 @@ public:
REGISTER_TIMER_INFO
(
"mkldnn_FwdTimer"
,
getName
().
c_str
());
CHECK
(
!
inputLayers_
.
empty
());
copySeqInfoToOutputs
();
size_t
elemenCnt
=
inputLayers_
[
0
]
->
getOutput
().
value
->
getElementCnt
();
size_t
elemenCnt
=
inputLayers_
[
0
]
->
getOutput
Value
()
->
getElementCnt
();
if
(
inputElemenCnt_
!=
elemenCnt
)
{
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" reset mkldnn forward"
;
// reset when input total sizes changed, not only the batchsize
inputElemenCnt_
=
elemenCnt
;
pipelineFwd_
.
clear
();
reshape
(
bs_
,
ic_
,
ih_
,
iw_
,
oc_
,
oh_
,
ow_
);
// all cpu device output grad or value share output's
shareCPUDevice
();
resetFwd
(
pipelineFwd_
,
inVal_
,
wgtVal_
,
biasVal_
,
outVal_
);
// MKLDNNLayer output value should be MKLDNNMatrix
// so external output value is necessary.
// then external input value is not necessary,
// since input may be mkldnn internal buffer.
CHECK
(
extOutVal_
)
<<
"external output value is necessary"
;
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
extOutVal_
);
CHECK
(
inVal_
&&
outVal_
)
<<
"internal memories are necessary"
;
if
(
cvtInVal_
)
{
pipelineFwd_
.
insert
(
pipelineFwd_
.
begin
(),
*
cvtInVal_
);
}
if
(
cvtOutVal_
)
{
pipelineFwd_
.
push_back
(
*
cvtOutVal_
);
}
convertWeightsFromPaddle
();
printValueFormat
();
needResetBwd_
=
true
;
}
if
(
inputLayers_
[
0
]
->
getType
()
==
"data"
)
{
updateInputData
();
// Update input value data when input layer is "data" type,
// since the input value data address might be changed.
CHECK
(
extInVal_
);
extInVal_
->
setData
(
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
());
}
if
(
!
outputOnlyMKLDNN_
)
{
...
...
@@ -149,8 +188,7 @@ public:
}
stream_
->
submit
(
pipelineFwd_
);
}
/* activation */
{
{
REGISTER_TIMER_INFO
(
"FwActTimer"
,
getName
().
c_str
());
forwardActivation
();
}
...
...
@@ -163,6 +201,16 @@ public:
pipelineMergeGrad_
.
clear
();
mergeGrad_
=
nullptr
;
resetBwd
(
pipelineBwd_
,
inGrad_
,
wgtGrad_
,
biasGrad_
,
outGrad_
);
// external output grad is not necessary
// since output may be mkldnn internal buffer or merge them directly.
CHECK
(
outGrad_
)
<<
"internal output grad is necessary"
;
if
(
cvtOutGrad_
)
{
pipelineBwd_
.
insert
(
pipelineBwd_
.
begin
(),
*
cvtOutGrad_
);
}
if
(
cvtInGrad_
)
{
pipelineBwd_
.
push_back
(
*
cvtInGrad_
);
}
printGradFormat
();
needResetBwd_
=
false
;
}
...
...
@@ -179,7 +227,6 @@ public:
REGISTER_TIMER_INFO
(
"mkldnn_bwdTimer"
,
getName
().
c_str
());
stream_
->
submit
(
pipelineBwd_
);
}
{
REGISTER_TIMER_INFO
(
"WeightUpdate"
,
getName
().
c_str
());
updateWeights
(
callback
);
...
...
@@ -195,7 +242,7 @@ public:
int
&
bs
,
int
&
ic
,
int
&
ih
,
int
&
iw
,
int
oc
,
int
&
oh
,
int
&
ow
)
=
0
;
/**
* reset the mkldnn forward primitve and memor
y
* reset the mkldnn forward primitve and memor
ies
* only would be called when input size changes
*/
virtual
void
resetFwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
...
...
@@ -205,7 +252,7 @@ public:
MKLDNNMatrixPtr
&
out
)
=
0
;
/**
* reset the mkldnn backward primitve and memor
y for mkldnn fc
* reset the mkldnn backward primitve and memor
ies
* only would be called when needed
*/
virtual
void
resetBwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
...
...
@@ -214,12 +261,6 @@ public:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
=
0
;
/**
* Update input value data when input layer is "data" type.
* Since the input value data address might be changed.
*/
virtual
void
updateInputData
()
{}
/**
* Update weights and biases if necessary.
*/
...
...
@@ -272,21 +313,167 @@ protected:
}
/**
* reset the output grad matrix from primitive desc.
* and reset the merge grad primitive if needed.
* note: when this layer has serval outputs,
* reset MKLDNNMatrix from Matrix and internal primitive desc.
* reset nullptr if matrix or primitive desc is empty
*/
void
resetWithMatrix
(
MKLDNNMatrixPtr
&
dnn
,
const
MatrixPtr
&
mat
,
mkldnn
::
memory
::
primitive_desc
pd
)
{
dnn
=
nullptr
;
if
(
mat
==
nullptr
)
{
return
;
}
dnn
=
MKLDNNMatrix
::
create
(
mat
,
pd
);
}
/**
* reset input value from input MKLDNNMatrix and internal primitive desc.
* reset both internal and external buffer and create reorder if necessary.
*/
void
resetInValue
(
MKLDNNMatrixPtr
&
in
,
const
std
::
shared_ptr
<
mkldnn
::
memory
::
primitive_desc
>&
intPD
=
nullptr
)
{
cvtInVal_
=
nullptr
;
extInVal_
=
nullptr
;
in
=
nullptr
;
CHECK_GT
(
bs_
*
ic_
*
ih_
*
iw_
,
0
);
auto
extPD
=
MKLDNNMatrix
::
createPrimitiveDesc
(
{
bs_
,
ic_
,
ih_
,
iw_
},
mkldnn
::
memory
::
format
::
nchw
,
engine_
);
const
MatrixPtr
&
inMat
=
inputLayers_
[
0
]
->
getOutputValue
();
in
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
inMat
);
CHECK_EQ
(
inputIsOnlyMKLDNN
(),
in
!=
nullptr
);
if
(
in
==
nullptr
||
in
->
getFormat
()
==
mkldnn
::
memory
::
format
::
nc
)
{
in
=
MKLDNNMatrix
::
create
(
inMat
,
extPD
);
}
extInVal_
=
isPaddleFormat
(
in
->
getFormat
())
?
in
:
nullptr
;
if
(
in
->
getFormat
()
==
mkldnn
::
memory
::
format
::
nc
)
{
CHECK
(
ih_
==
1
&&
iw_
==
1
);
}
if
(
nullptr
==
intPD
||
in
->
getPrimitiveDesc
()
==
*
intPD
)
{
return
;
}
// need create reorder
in
=
MKLDNNMatrix
::
create
(
nullptr
,
*
intPD
);
extInVal_
=
extInVal_
?
extInVal_
:
MKLDNNMatrix
::
create
(
inMat
,
extPD
);
cvtInVal_
=
MKLDNNMatrix
::
createReorder
(
extInVal_
,
in
);
CHECK
(
cvtInVal_
)
<<
"should not be emptry"
;
}
/**
* reset output value from internal primitive desc.
* reset both internal and external buffer and create reorder if necessary.
*/
void
resetOutValue
(
MKLDNNMatrixPtr
&
out
,
mkldnn
::
memory
::
primitive_desc
intPD
)
{
cvtOutVal_
=
nullptr
;
out
=
MKLDNNMatrix
::
create
(
output_
.
value
,
intPD
);
extOutVal_
=
out
;
if
(
outputIsOnlyMKLDNN
()
||
isPaddleFormat
(
extOutVal_
->
getFormat
()))
{
return
;
}
// need create reorder
CHECK_GT
(
bs_
*
oc_
*
oh_
*
ow_
,
0
);
extOutVal_
=
MKLDNNMatrix
::
create
(
output_
.
value
,
{
bs_
,
oc_
,
oh_
,
ow_
},
mkldnn
::
memory
::
format
::
nchw
,
engine_
);
out
=
MKLDNNMatrix
::
create
(
nullptr
,
intPD
);
cvtOutVal_
=
MKLDNNMatrix
::
createReorder
(
out
,
extOutVal_
);
CHECK
(
cvtOutVal_
)
<<
"should not be empty"
;
}
/**
* reset input grad from internal primitive desc.
* reset both internal and external buffer and create reorder if necessary.
*/
void
resetInGrad
(
MKLDNNMatrixPtr
&
in
,
mkldnn
::
memory
::
primitive_desc
intPD
)
{
cvtInGrad_
=
nullptr
;
extInGrad_
=
nullptr
;
in
=
nullptr
;
LayerPtr
&
input
=
inputLayers_
[
0
];
if
(
input
->
getOutputGrad
()
==
nullptr
)
{
// no need input grad
return
;
}
CHECK
(
inputIsOnlyMKLDNN
()
||
input
->
getOutputMapSize
()
<=
1
)
<<
"only support input is MKLDNN layer or only have one output layer"
;
// when input is a mkldnn branch node,
// this layer will save input grad to a internal buffer,
// and the mkldnn input layer will merge them to actual prev->output_.grad
const
MatrixPtr
&
inMat
=
input
->
getOutputMapSize
()
<=
1
?
input
->
getOutputGrad
()
:
nullptr
;
in
=
MKLDNNMatrix
::
create
(
inMat
,
intPD
);
Argument
&
arg
=
input
->
getOutput
(
this
->
getName
());
arg
.
grad
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
in
);
CHECK
(
inVal_
!=
nullptr
&&
inVal_
->
getPrimitiveDesc
()
==
intPD
)
<<
"should have internal input value and primitive desc must equal"
;
if
(
inputIsOnlyMKLDNN
())
{
return
;
}
extInGrad_
=
in
;
if
(
isPaddleFormat
(
extInGrad_
->
getFormat
()))
{
return
;
}
// need create reorder
CHECK
(
extInVal_
!=
nullptr
&&
isPaddleFormat
(
extInVal_
->
getFormat
()))
<<
"should have external input value and the format must be nchw(nc)"
;
extInGrad_
=
MKLDNNMatrix
::
create
(
inMat
,
extInVal_
->
getPrimitiveDesc
());
CHECK
(
inVal_
!=
nullptr
&&
inVal_
->
getPrimitiveDesc
()
==
intPD
)
<<
"should have internal input value and primitive desc must equal"
;
in
=
MKLDNNMatrix
::
create
(
nullptr
,
intPD
);
cvtInGrad_
=
MKLDNNMatrix
::
createReorder
(
in
,
extInGrad_
);
CHECK
(
cvtInGrad_
);
}
/**
* reset output grad from internal primitive desc.
* merge grad if necessary.
* reset both internal and external buffer and create reorder if necessary.
* note: about merge grad, when this layer has serval outputs,
* it could not be mixed with cpu device,
* since it can not get memory desc from cpu device.
*/
virtual
void
resetOutGrad
(
MKLDNNMatrixPtr
&
out
,
mkldnn
::
memory
::
primitive_desc
pd
)
{
CHECK
(
outputIsOnlyMKLDNN
())
<<
"do not support mixed with other device yet"
;
void
resetOutGrad
(
MKLDNNMatrixPtr
&
out
,
mkldnn
::
memory
::
primitive_desc
intPD
)
{
cvtOutGrad_
=
nullptr
;
extOutGrad_
=
nullptr
;
out
=
nullptr
;
MatrixPtr
&
outMat
=
output_
.
grad
;
out
=
MKLDNNMatrix
::
create
(
outMat
,
intPD
);
resetMergeGrad
(
out
);
if
(
outputIsOnlyMKLDNN
())
{
return
;
}
CHECK_LE
(
outputMap_
.
size
(),
1U
)
<<
"do not support mixed with cpu device"
;
extOutGrad_
=
out
;
if
(
isPaddleFormat
(
extOutGrad_
->
getFormat
()))
{
return
;
}
// need create reorder
CHECK
(
extOutVal_
!=
nullptr
&&
isPaddleFormat
(
extOutVal_
->
getFormat
()))
<<
"should have external output value and the format must be nchw(nc)"
;
extOutGrad_
=
MKLDNNMatrix
::
create
(
outMat
,
extOutVal_
->
getPrimitiveDesc
());
CHECK
(
outVal_
!=
nullptr
&&
outVal_
->
getPrimitiveDesc
()
==
intPD
)
<<
"should have internal output value and primitive desc must equal"
;
out
=
MKLDNNMatrix
::
create
(
nullptr
,
intPD
);
cvtOutGrad_
=
MKLDNNMatrix
::
createReorder
(
extOutGrad_
,
out
);
CHECK
(
cvtOutGrad_
);
}
/**
* reset the merge grad primitive if necessary.
* note: do not support the grads are mixed with cpu device,
* since it can not get memory desc from cpu device.
*/
virtual
void
resetMergeGrad
(
MKLDNNMatrixPtr
&
out
)
{
mergeGrad_
=
nullptr
;
pipelineMergeGrad_
.
clear
();
out
=
MKLDNNMatrix
::
create
(
output_
.
grad
,
pd
);
if
(
outputMap_
.
size
()
<=
1
)
{
if
(
outputMap_
.
size
()
<=
1
||
!
outputIsOnlyMKLDNN
())
{
// do not merge when output is not all MKLDNN or only one output
return
;
}
CHECK
(
out
)
<<
"should have reset internal ouput grad"
;
std
::
vector
<
double
>
scales
(
outputMap_
.
size
(),
1.0
);
std
::
vector
<
mkldnn
::
memory
::
primitive_desc
>
srcPDs
;
std
::
vector
<
mkldnn
::
primitive
::
at
>
srcs
;
...
...
@@ -309,15 +496,13 @@ protected:
for
(
size_t
i
=
1
;
i
<
srcPDs
.
size
();
++
i
)
{
CHECK
(
srcPDs
[
0
]
==
srcPDs
[
i
]);
}
tmpOutGrad_
=
nullptr
;
tmpOutGrad_
=
out
;
tmpCvt_
=
nullptr
;
if
(
out
->
getPrimitiveDesc
()
!=
srcPDs
[
0
])
{
tmpOutGrad_
=
MKLDNNMatrix
::
create
(
nullptr
,
srcPDs
[
0
]);
tmpCvt_
=
MKLDNNMatrix
::
createReorder
(
tmpOutGrad_
,
out
);
CHECK
(
tmpCvt_
);
pipelineMergeGrad_
.
push_back
(
*
tmpCvt_
);
}
else
{
tmpOutGrad_
=
out
;
}
auto
sumPD
=
mkldnn
::
sum
::
primitive_desc
(
...
...
@@ -326,21 +511,6 @@ protected:
pipelineMergeGrad_
.
insert
(
pipelineMergeGrad_
.
begin
(),
*
mergeGrad_
);
}
/**
* reset input grad from primitive desc.
* this function is avaiable for input is only mkldnn
* or input do not care cpu device
*/
virtual
void
resetInGrad
(
MKLDNNMatrixPtr
&
in
,
mkldnn
::
memory
::
primitive_desc
pd
)
{
LayerPtr
&
input
=
inputLayers_
[
0
];
const
MatrixPtr
&
grad
=
input
->
getOutputMapSize
()
>
1
?
nullptr
:
input
->
getOutput
().
grad
;
in
=
MKLDNNMatrix
::
create
(
grad
,
pd
);
Argument
&
arg
=
input
->
getOutput
(
this
->
getName
());
arg
.
grad
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
in
);
}
/**
* print info about sizes
*/
...
...
@@ -351,22 +521,50 @@ protected:
}
/**
*
Print the mkldnn memory format flow
of value
*
print the mkldnn memory format
of value
*/
virtual
void
printValueFormatFlow
()
{
if
(
inVal_
&&
outVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
inVal_
->
getFormat
()
<<
" >>> "
<<
outVal_
->
getFormat
();
virtual
void
printValueFormat
()
{
if
(
extInVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
extInVal_
->
getFormat
()
<<
" >>> "
;
}
if
(
inVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
inVal_
->
getFormat
()
<<
" >>>"
;
}
if
(
outVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
outVal_
->
getFormat
()
<<
" >>> "
;
}
if
(
extOutVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
extOutVal_
->
getFormat
();
}
if
(
wgtVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"Weight value format: "
<<
wgtVal_
->
getFormat
();
}
if
(
biasVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"Bias value format: "
<<
biasVal_
->
getFormat
();
}
}
/**
*
Print the mkldnn memory format flow
of grad
*
print the mkldnn memory format
of grad
*/
virtual
void
printGradFormatFlow
()
{
if
(
inGrad_
&&
outGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
inGrad_
->
getFormat
()
<<
" <<< "
<<
outGrad_
->
getFormat
();
virtual
void
printGradFormat
()
{
if
(
extInGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
extInGrad_
->
getFormat
()
<<
" <<< "
;
}
if
(
inGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
inGrad_
->
getFormat
()
<<
" <<<"
;
}
if
(
outGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
outGrad_
->
getFormat
()
<<
" <<< "
;
}
if
(
extOutGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
extOutGrad_
->
getFormat
();
}
if
(
wgtGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"Weight grad format: "
<<
wgtGrad_
->
getFormat
();
}
if
(
biasGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"Bias grad format: "
<<
biasGrad_
->
getFormat
();
}
}
...
...
@@ -405,6 +603,19 @@ protected:
void
setDevice
(
int
id
)
{
deviceId_
=
id
;
}
private:
/**
* check the format is nchw or nc,
* which is supported by Paddle default memory layout
*/
bool
isPaddleFormat
(
mkldnn
::
memory
::
format
fmt
)
{
if
(
fmt
==
mkldnn
::
memory
::
format
::
nchw
||
fmt
==
mkldnn
::
memory
::
format
::
nc
)
{
return
true
;
}
else
{
return
false
;
}
}
/**
* clear all grad
*/
...
...
@@ -449,6 +660,19 @@ private:
}
}
/**
* if have cpu device, share value and grad data with output_
*/
void
shareCPUDevice
()
{
if
(
outputIsOnlyMKLDNN
())
{
return
;
}
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
outputOtherDevice_
[
i
].
value
=
output_
.
value
;
outputOtherDevice_
[
i
].
grad
=
output_
.
grad
;
}
}
/**
* Check the cpu device number of outputOtherDevice_.
* should have only one at most.
...
...
paddle/gserver/layers/MKLDNNPoolLayer.cpp
浏览文件 @
c1914543
...
...
@@ -85,8 +85,6 @@ void MKLDNNPoolLayer::resetFwd(std::vector<primitive>& pipeline,
resetFwdPD
(
fwdPD_
,
in
,
out
);
resetFwdPipeline
(
pipeline
,
fwdPD_
,
in
,
out
);
printValueFormatFlow
();
}
void
MKLDNNPoolLayer
::
resetBwd
(
std
::
vector
<
primitive
>&
pipeline
,
...
...
@@ -101,65 +99,22 @@ void MKLDNNPoolLayer::resetBwd(std::vector<primitive>& pipeline,
resetBwdPD
(
pd
,
in
,
out
);
resetBwdPipeline
(
pipeline
,
pd
,
in
,
out
);
printGradFormatFlow
();
}
void
MKLDNNPoolLayer
::
updateInputData
()
{
inVal_
->
setData
(
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
());
}
void
MKLDNNPoolLayer
::
resetFwdBuffers
(
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
resetInValue
(
in
);
resetOutValue
(
out
);
}
void
MKLDNNPoolLayer
::
resetInValue
(
MKLDNNMatrixPtr
&
in
)
{
if
(
inputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
dnnIn
=
getInputValue
(
0
);
in
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
dnnIn
);
CHECK
(
in
)
<<
"Input should be MKLDNNMatrix"
;
}
else
{
CHECK_EQ
(
getPrev
(
0
)
->
getDeviceId
(),
CPU_DEVICE
)
<<
"Only support CPU yet"
;
const
MatrixPtr
&
cpuIn
=
getInputValue
(
0
,
CPU_DEVICE
);
in
=
MKLDNNMatrix
::
create
(
cpuIn
,
{
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
,
engine_
);
}
}
void
MKLDNNPoolLayer
::
resetOutValue
(
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
inVal_
)
<<
"Should reset input value first"
;
memory
::
dims
outDims
=
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
};
out
=
MKLDNNMatrix
::
create
(
output_
.
value
,
outDims
,
inVal_
->
getFormat
(),
engine_
);
// create reorder if output value has cpu device and pd do not match
cpuOutVal_
=
nullptr
;
cvtOutVal_
=
nullptr
;
if
(
!
outputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
value
;
cpuOutVal_
=
MKLDNNMatrix
::
create
(
cpuOut
,
outDims
,
format
::
nchw
,
engine_
);
if
(
cpuOutVal_
->
getPrimitiveDesc
()
!=
out
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
out
->
getPrimitiveDesc
());
cvtOutVal_
=
MKLDNNMatrix
::
createReorder
(
out
,
cpuOutVal_
);
CHECK
(
cvtOutVal_
)
<<
"should not be emptry"
;
}
else
{
cpuOut
->
setData
(
output_
.
value
->
getData
());
cpuOutVal_
=
out
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
cpuOutVal_
);
return
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
outVal_
);
CHECK
(
in
);
auto
outPD
=
MKLDNNMatrix
::
createPrimitiveDesc
(
outDims
,
in
->
getFormat
(),
engine_
);
resetOutValue
(
out
,
outPD
);
}
void
MKLDNNPoolLayer
::
resetFwdPD
(
std
::
shared_ptr
<
pool_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
in
,
MKLDNNMatrixPtr
out
)
{
memory
::
dims
inDims
=
memory
::
dims
{
bs_
,
ic_
,
ih_
,
iw_
};
memory
::
dims
outDims
=
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
};
memory
::
dims
kernels
=
memory
::
dims
{
fh_
,
fw_
};
memory
::
dims
strides
=
memory
::
dims
{
sh_
,
sw_
};
memory
::
dims
padL
=
memory
::
dims
{
ph_
,
pw_
};
...
...
@@ -194,58 +149,26 @@ void MKLDNNPoolLayer::resetFwdPipeline(
?
std
::
make_shared
<
pool_fwd
>
(
pool_fwd
(
*
pd
,
*
in
,
*
out
,
*
workspace_
))
:
std
::
make_shared
<
pool_fwd
>
(
pool_fwd
(
*
pd
,
*
in
,
*
out
));
pipeline
.
push_back
(
*
fwd_
);
if
(
cvtOutVal_
)
{
pipeline
.
push_back
(
*
cvtOutVal_
);
}
}
void
MKLDNNPoolLayer
::
resetBwdBuffers
(
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
resetOutGrad
(
out
);
resetInGrad
(
in
);
}
void
MKLDNNPoolLayer
::
resetOutGrad
(
MKLDNNMatrixPtr
&
out
)
{
cpuOutGrad_
=
nullptr
;
cvtOutGrad_
=
nullptr
;
CHECK
(
outVal_
);
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
// always share the same grad data of CPU output
// then the activation can get the right grad from output_.grad
output_
.
grad
->
setData
(
cpuOut
->
getData
());
cpuOutGrad_
=
MKLDNNMatrix
::
create
(
cpuOut
,
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
},
format
::
nchw
,
engine_
);
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
!=
outVal_
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
outVal_
->
getPrimitiveDesc
());
cvtOutGrad_
=
MKLDNNMatrix
::
createReorder
(
cpuOutGrad_
,
out
);
CHECK
(
cvtOutGrad_
)
<<
"should not be emptry"
;
}
else
{
out
=
cpuOutGrad_
;
}
}
}
void
MKLDNNPoolLayer
::
resetInGrad
(
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
if
(
inputLayers_
[
0
]
->
getOutput
().
grad
==
nullptr
)
{
return
;
}
CHECK
(
inVal_
);
MKLDNNLayer
::
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
CHECK
(
inVal_
&&
outVal_
);
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
}
void
MKLDNNPoolLayer
::
resetBwdPD
(
std
::
shared_ptr
<
pool_bwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
pd
=
nullptr
;
if
(
in
==
nullptr
)
{
return
;
}
memory
::
dims
kernels
=
memory
::
dims
{
fh_
,
fw_
};
memory
::
dims
strides
=
memory
::
dims
{
sh_
,
sw_
};
memory
::
dims
padL
=
memory
::
dims
{
ph_
,
pw_
};
memory
::
dims
padR
=
getPaddingR
();
CHECK
(
in
);
CHECK
(
out
);
auto
bwdDesc
=
pool_bwd
::
desc
(
poolAlgo_
,
in
->
getMemoryDesc
(),
...
...
@@ -263,8 +186,8 @@ void MKLDNNPoolLayer::resetBwdPipeline(
std
::
shared_ptr
<
pool_bwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
if
(
cvtOutGrad_
)
{
pipeline
.
push_back
(
*
cvtOutGrad_
)
;
if
(
pd
==
nullptr
)
{
return
;
}
bwdData_
=
...
...
paddle/gserver/layers/MKLDNNPoolLayer.h
浏览文件 @
c1914543
...
...
@@ -38,13 +38,6 @@ protected:
// pooling_avg or pooling_max
mkldnn
::
algorithm
poolAlgo_
;
// MKLDNNMatrixPtr which should be created from CPU Device
MKLDNNMatrixPtr
cpuOutVal_
;
MKLDNNMatrixPtr
cpuOutGrad_
;
// convert handle between CPU device and MKLDNN device
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutGrad_
;
// save forward primitive_desc, which can be used backward
std
::
shared_ptr
<
pool_fwd
::
primitive_desc
>
fwdPD_
;
// according to https://github.com/01org/mkl-dnn/blob/master/tests/gtests/
...
...
@@ -74,8 +67,6 @@ public:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
override
;
void
updateInputData
()
override
;
void
printSizeInfo
()
override
{
MKLDNNLayer
::
printSizeInfo
();
VLOG
(
MKLDNN_SIZES
)
<<
getName
()
<<
": fh: "
<<
fh_
<<
", fw: "
<<
fw_
...
...
@@ -90,8 +81,6 @@ protected:
* reset pipeline.
*/
void
resetFwdBuffers
(
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
);
void
resetInValue
(
MKLDNNMatrixPtr
&
in
);
void
resetOutValue
(
MKLDNNMatrixPtr
&
out
);
void
resetFwdPD
(
std
::
shared_ptr
<
pool_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
in
,
MKLDNNMatrixPtr
out
);
...
...
@@ -106,8 +95,6 @@ protected:
* reset pipeline.
*/
void
resetBwdBuffers
(
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
);
void
resetOutGrad
(
MKLDNNMatrixPtr
&
out
);
void
resetInGrad
(
MKLDNNMatrixPtr
&
in
);
void
resetBwdPD
(
std
::
shared_ptr
<
pool_bwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
);
...
...
paddle/math/MKLDNNMatrix.cpp
浏览文件 @
c1914543
...
...
@@ -46,7 +46,7 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m,
memory
::
format
fmt
,
engine
&
eg
,
mkldnn
::
memory
::
data_type
dtype
)
{
return
create
(
m
,
memory
::
primitive_desc
(
memory
::
desc
(
dims
,
dtype
,
fmt
),
eg
));
return
create
(
m
,
createPrimitiveDesc
(
dims
,
fmt
,
eg
,
dtype
));
}
std
::
shared_ptr
<
reorder
>
MKLDNNMatrix
::
createReorder
(
const
MKLDNNMatrixPtr
&
src
,
...
...
paddle/math/MKLDNNMatrix.h
浏览文件 @
c1914543
...
...
@@ -52,12 +52,24 @@ public:
mkldnn
::
engine
&
eg
,
mkldnn
::
memory
::
data_type
dtype
=
mkldnn
::
memory
::
data_type
::
f32
);
/**
* Create primitive descriptor.
* default with f32 dtype
*/
static
mkldnn
::
memory
::
primitive_desc
createPrimitiveDesc
(
const
mkldnn
::
memory
::
dims
dims
,
const
mkldnn
::
memory
::
format
&
fmt
,
const
mkldnn
::
engine
&
eg
,
const
mkldnn
::
memory
::
data_type
&
dtype
=
mkldnn
::
memory
::
data_type
::
f32
)
{
return
mkldnn
::
memory
::
primitive_desc
(
memory
::
desc
(
dims
,
dtype
,
fmt
),
eg
);
}
/**
* Create Memory descriptor.
* default with any format and f32 dtype
*/
static
mkldnn
::
memory
::
desc
createMemoryDesc
(
const
mkldnn
::
memory
::
dims
&
dims
,
const
mkldnn
::
memory
::
dims
dims
,
const
mkldnn
::
memory
::
format
&
fmt
=
mkldnn
::
memory
::
format
::
any
,
const
mkldnn
::
memory
::
data_type
&
dtype
=
mkldnn
::
memory
::
data_type
::
f32
)
{
return
mkldnn
::
memory
::
desc
(
dims
,
dtype
,
fmt
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录