Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
abce9eb7
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
abce9eb7
编写于
10月 23, 2017
作者:
T
Tao Luo
提交者:
GitHub
10月 23, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4953 from tensor-tang/merge_grad_gtest
refine the mkldnn logic
上级
c91de280
5c892db6
变更
16
显示空白变更内容
内联
并排
Showing
16 changed file
with
612 addition
and
711 deletion
+612
-711
paddle/gserver/activations/MKLDNNActivation.cpp
paddle/gserver/activations/MKLDNNActivation.cpp
+3
-3
paddle/gserver/layers/MKLDNNBase.h
paddle/gserver/layers/MKLDNNBase.h
+2
-2
paddle/gserver/layers/MKLDNNConvLayer.cpp
paddle/gserver/layers/MKLDNNConvLayer.cpp
+34
-202
paddle/gserver/layers/MKLDNNConvLayer.h
paddle/gserver/layers/MKLDNNConvLayer.h
+0
-66
paddle/gserver/layers/MKLDNNFcLayer.cpp
paddle/gserver/layers/MKLDNNFcLayer.cpp
+25
-81
paddle/gserver/layers/MKLDNNFcLayer.h
paddle/gserver/layers/MKLDNNFcLayer.h
+0
-8
paddle/gserver/layers/MKLDNNLayer.cpp
paddle/gserver/layers/MKLDNNLayer.cpp
+333
-0
paddle/gserver/layers/MKLDNNLayer.h
paddle/gserver/layers/MKLDNNLayer.h
+142
-199
paddle/gserver/layers/MKLDNNPoolLayer.cpp
paddle/gserver/layers/MKLDNNPoolLayer.cpp
+13
-90
paddle/gserver/layers/MKLDNNPoolLayer.h
paddle/gserver/layers/MKLDNNPoolLayer.h
+0
-13
paddle/gserver/tests/MKLDNNTester.cpp
paddle/gserver/tests/MKLDNNTester.cpp
+24
-20
paddle/gserver/tests/MKLDNNTester.h
paddle/gserver/tests/MKLDNNTester.h
+2
-6
paddle/math/MKLDNNMatrix.cpp
paddle/math/MKLDNNMatrix.cpp
+4
-4
paddle/math/MKLDNNMatrix.h
paddle/math/MKLDNNMatrix.h
+16
-3
paddle/trainer/tests/sample_trainer_config_branch_net.conf
paddle/trainer/tests/sample_trainer_config_branch_net.conf
+13
-13
paddle/trainer/tests/sample_trainer_config_simple_net.conf
paddle/trainer/tests/sample_trainer_config_simple_net.conf
+1
-1
未找到文件。
paddle/gserver/activations/MKLDNNActivation.cpp
浏览文件 @
abce9eb7
...
...
@@ -126,7 +126,7 @@ void MKLDNNEltwiseActivation::resetFwd(Argument& act) {
copyInVal_
=
nullptr
;
if
(
act
.
grad
&&
algo
==
algorithm
::
eltwise_tanh
)
{
// tanh need save src input for backward
inVal_
=
MKLDNNMatrix
::
create
(
nullptr
,
val_
->
getPrimitiveDesc
());
inVal_
=
MKLDNNMatrix
::
create
(
val_
->
getPrimitiveDesc
());
copyInVal_
=
std
::
make_shared
<
mkldnn
::
reorder
>
(
*
val_
,
*
inVal_
);
CHECK
(
copyInVal_
)
<<
"should not be emptry"
;
pipelineFwd_
.
push_back
(
*
copyInVal_
);
...
...
@@ -145,7 +145,7 @@ void MKLDNNEltwiseActivation::resetBwd(Argument& act) {
algorithm
algo
=
getAlgo
(
this
->
getName
());
float
alpha
=
getBwdAlpha
();
float
beta
=
getBeta
();
grad_
=
MKLDNNMatrix
::
create
(
act
.
grad
,
val_
->
getPrimitiveDesc
()
);
grad_
=
MKLDNNMatrix
::
create
(
val_
->
getPrimitiveDesc
(),
act
.
grad
);
auto
eng
=
CPUEngine
::
Instance
().
getEngine
();
auto
bwdDesc
=
eltwise_bwd
::
desc
(
algo
,
grad_
->
getMemoryDesc
(),
val_
->
getMemoryDesc
(),
alpha
,
beta
);
...
...
@@ -230,7 +230,7 @@ void MKLDNNActivation::resetFwd(Argument& act) {
int
ic
=
cnt_
/
bs
/
ih
/
iw
;
CHECK_EQ
(
cnt_
,
(
size_t
)
bs
*
ic
*
ih
*
iw
);
val_
=
MKLDNNMatrix
::
create
(
act
.
value
,
{
bs
,
ic
,
ih
,
iw
},
mkldnn
::
memory
::
format
::
nchw
,
*
engine_
);
{
bs
,
ic
,
ih
,
iw
},
mkldnn
::
memory
::
format
::
nchw
,
*
engine_
,
act
.
value
);
CHECK
(
val_
);
val_
->
downSpatial
();
}
...
...
paddle/gserver/layers/MKLDNNBase.h
浏览文件 @
abce9eb7
...
...
@@ -21,8 +21,8 @@ namespace paddle {
typedef
enum
{
MKLDNN_BASE
=
1
,
// basical info of MKLDNN
MKLDNN_TESTS
=
1
,
// gtest info of MKLDNN
MKLDNN_
SIZES
=
2
,
// size
info of MKLDNN
MKLDNN_
FMTS
=
3
,
// format
info of MKLDNN
MKLDNN_
FMTS
=
2
,
// format
info of MKLDNN
MKLDNN_
SIZES
=
3
,
// size
info of MKLDNN
MKLDNN_ALL
=
4
,
// show all info of MKLDNN
}
MKLDNN_LOG_LEVEL
;
...
...
paddle/gserver/layers/MKLDNNConvLayer.cpp
浏览文件 @
abce9eb7
...
...
@@ -116,8 +116,6 @@ void MKLDNNConvLayer::resetFwd(std::vector<primitive>& pipeline,
resetFwdBuffers
(
fwdPD_
,
in
,
wgt
,
bias
,
out
);
resetFwdPipeline
(
pipeline
,
fwdPD_
,
in
,
wgt
,
bias
,
out
);
printValueFormatFlow
();
}
void
MKLDNNConvLayer
::
resetBwd
(
std
::
vector
<
primitive
>&
pipeline
,
...
...
@@ -135,12 +133,6 @@ void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline,
resetBwdBuffers
(
bwdWgtPD
,
bwdDataPD
,
in
,
wgt
,
bias
,
out
);
resetBwdPipeline
(
pipeline
,
bwdWgtPD
,
bwdDataPD
,
in
,
wgt
,
bias
,
out
);
printGradFormatFlow
();
}
void
MKLDNNConvLayer
::
updateInputData
()
{
cpuInVal_
->
setData
(
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
());
}
void
MKLDNNConvLayer
::
updateWeights
(
const
UpdateCallback
&
callback
)
{
...
...
@@ -211,11 +203,18 @@ void MKLDNNConvLayer::resetFwdBuffers(
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
pd
);
resetInValue
(
pd
,
in
);
resetInValue
(
in
,
std
::
make_shared
<
memory
::
primitive_desc
>
(
pd
->
src_primitive_desc
()));
resetOutValue
(
out
,
pd
->
dst_primitive_desc
());
resetW
gtBiasValue
(
pd
,
wgt
,
bias
);
resetW
ithMatrix
(
wgt
,
weight_
->
getW
(),
pd
->
weights_primitive_desc
()
);
resetOutValue
(
pd
,
out
);
if
(
biases_
&&
biases_
->
getW
())
{
resetWithMatrix
(
bias
,
biases_
->
getW
(),
pd
->
bias_primitive_desc
());
}
else
{
bias
=
nullptr
;
}
}
void
MKLDNNConvLayer
::
resetFwdPipeline
(
...
...
@@ -225,104 +224,12 @@ void MKLDNNConvLayer::resetFwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
if
(
cvtInVal_
)
{
pipeline
.
push_back
(
*
cvtInVal_
);
}
if
(
bias
)
{
fwd_
.
reset
(
new
conv_fwd
(
*
pd
,
*
in
,
*
wgt
,
*
bias
,
*
out
));
}
else
{
fwd_
.
reset
(
new
conv_fwd
(
*
pd
,
*
in
,
*
wgt
,
*
out
));
}
pipeline
.
push_back
(
*
fwd_
);
if
(
cvtOutVal_
)
{
pipeline
.
push_back
(
*
cvtOutVal_
);
}
}
void
MKLDNNConvLayer
::
resetInValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
)
{
const
MatrixPtr
&
inMat
=
inputLayers_
[
0
]
->
getOutputValue
();
in
=
MKLDNNMatrix
::
create
(
inMat
,
pd
->
src_primitive_desc
());
// create buffer and reorder if input value do not match
cpuInVal_
=
nullptr
;
cvtInVal_
=
nullptr
;
MKLDNNMatrixPtr
dnnIn
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
inMat
);
CHECK_EQ
(
inputIsOnlyMKLDNN
(),
dnnIn
!=
nullptr
);
if
(
dnnIn
!=
nullptr
&&
dnnIn
->
getPrimitiveDesc
()
==
in
->
getPrimitiveDesc
())
{
in
=
dnnIn
;
return
;
}
if
(
dnnIn
)
{
if
(
dnnIn
->
getFormat
()
==
format
::
nc
)
{
CHECK
(
ih_
==
1
&&
iw_
==
1
)
<<
"when input is nc format"
;
// create a new one with nchw format and same data
memory
::
dims
inDims
=
memory
::
dims
{
bs_
,
ic_
,
1
,
1
};
dnnIn
=
MKLDNNMatrix
::
create
(
inMat
,
inDims
,
format
::
nchw
,
engine_
);
}
if
(
dnnIn
->
getPrimitiveDesc
()
==
in
->
getPrimitiveDesc
())
{
in
=
dnnIn
;
return
;
}
cpuInVal_
=
dnnIn
;
in
=
MKLDNNMatrix
::
create
(
nullptr
,
pd
->
src_primitive_desc
());
cvtInVal_
=
MKLDNNMatrix
::
createReorder
(
cpuInVal_
,
in
);
CHECK
(
cvtInVal_
)
<<
"should not be emptry"
;
}
else
{
memory
::
dims
inDims
=
memory
::
dims
{
bs_
,
ic_
,
ih_
,
iw_
};
cpuInVal_
=
MKLDNNMatrix
::
create
(
inMat
,
inDims
,
format
::
nchw
,
engine_
);
if
(
cpuInVal_
->
getPrimitiveDesc
()
!=
in
->
getPrimitiveDesc
())
{
// create new mkldnn matrix
in
=
MKLDNNMatrix
::
create
(
nullptr
,
pd
->
src_primitive_desc
());
cvtInVal_
=
MKLDNNMatrix
::
createReorder
(
cpuInVal_
,
in
);
CHECK
(
cvtInVal_
)
<<
"should not be emptry"
;
}
else
{
in
=
cpuInVal_
;
}
}
}
void
MKLDNNConvLayer
::
resetWgtBiasValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
)
{
wgt
=
MKLDNNMatrix
::
create
(
weight_
->
getW
(),
pd
->
weights_primitive_desc
());
VLOG
(
MKLDNN_FMTS
)
<<
"Weight value format: "
<<
wgt
->
getFormat
();
bias
=
(
biases_
&&
biases_
->
getW
())
?
MKLDNNMatrix
::
create
(
biases_
->
getW
(),
pd
->
bias_primitive_desc
())
:
nullptr
;
}
void
MKLDNNConvLayer
::
resetOutValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
out
)
{
out
=
MKLDNNMatrix
::
create
(
output_
.
value
,
pd
->
dst_primitive_desc
());
// create reorder if output value has cpu device and pd do not match
cpuOutVal_
=
nullptr
;
cvtOutVal_
=
nullptr
;
if
(
!
outputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
value
;
memory
::
dims
outDims
=
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
};
cpuOutVal_
=
MKLDNNMatrix
::
create
(
cpuOut
,
outDims
,
format
::
nchw
,
engine_
);
if
(
cpuOutVal_
->
getPrimitiveDesc
()
!=
pd
->
dst_primitive_desc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
pd
->
dst_primitive_desc
());
cvtOutVal_
=
MKLDNNMatrix
::
createReorder
(
out
,
cpuOutVal_
);
CHECK
(
cvtOutVal_
)
<<
"should not be empty"
;
}
else
{
cpuOut
->
setData
(
output_
.
value
->
getData
());
cpuOutVal_
=
out
;
}
// when output is cpu device, change the mkldnn output value and make them
// share the same data. Then if next layer use inputlayer->getOuputValue()
// to achieve the input value, it will get the right data.
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
cpuOutVal_
);
return
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
out
);
}
void
MKLDNNConvLayer
::
resetBwdWgtPD
(
...
...
@@ -331,8 +238,8 @@ void MKLDNNConvLayer::resetBwdWgtPD(
loadConvSettings
(
wgtDims
,
biasDims
,
strides
,
dilations
,
padL
,
padR
);
// create backward weight using input, output and weight value memory desc
CHECK
(
inVal_
)
<<
"Should have input value"
;
CHECK
(
outVal_
)
<<
"Should have output value"
;
CHECK
(
inVal_
)
<<
"Should have in
ternal in
put value"
;
CHECK
(
outVal_
)
<<
"Should have
internal
output value"
;
CHECK
(
wgtVal_
)
<<
"Should have weight value"
;
algorithm
algo
=
algorithm
::
convolution_direct
;
padding_kind
padKind
=
padding_kind
::
zero
;
...
...
@@ -372,8 +279,8 @@ void MKLDNNConvLayer::resetBwdDataPD(
memory
::
dims
wgtDims
,
biasDims
,
strides
,
dilations
,
padL
,
padR
;
loadConvSettings
(
wgtDims
,
biasDims
,
strides
,
dilations
,
padL
,
padR
);
CHECK
(
inVal_
)
<<
"Should have input value"
;
CHECK
(
outVal_
)
<<
"Should have output value"
;
CHECK
(
inVal_
)
<<
"Should have in
ternal in
put value"
;
CHECK
(
outVal_
)
<<
"Should have
internal
output value"
;
// create backward data using input and output value memory desc
// but using weight memory desc with any format
auto
bwdDataDesc
=
conv_bwdData
::
desc
(
algorithm
::
convolution_direct
,
...
...
@@ -399,12 +306,27 @@ void MKLDNNConvLayer::resetBwdBuffers(
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
wgtPD
);
resetOutGrad
(
wgtPD
,
out
);
resetOutGrad
(
out
,
wgtPD
->
diff_dst_primitive_desc
()
);
resetWgtBiasGrad
(
wgtPD
,
wgt
,
bias
);
resetWithMatrix
(
wgt
,
weight_
->
getWGrad
(),
wgtPD
->
diff_weights_primitive_desc
());
CHECK
(
wgtVal_
!=
nullptr
&&
wgt
->
getPrimitiveDesc
()
==
wgtVal_
->
getPrimitiveDesc
())
<<
"primitive desc of weight grad and value should be equal"
;
resetInGrad
(
dataPD
,
in
);
bias
=
nullptr
;
if
(
biases_
&&
biases_
->
getWGrad
())
{
resetWithMatrix
(
bias
,
biases_
->
getWGrad
(),
wgtPD
->
diff_bias_primitive_desc
());
CHECK
(
bias
&&
biasVal_
&&
bias
->
getPrimitiveDesc
()
==
biasVal_
->
getPrimitiveDesc
())
<<
"primitive desc of bias grad should equal the bias value"
;
}
if
(
dataPD
==
nullptr
)
{
return
;
}
resetInGrad
(
in
,
dataPD
->
diff_src_primitive_desc
());
resetWgtValBwdData
(
dataPD
,
wgtValBwdData_
);
}
...
...
@@ -416,10 +338,7 @@ void MKLDNNConvLayer::resetBwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
if
(
cvtOutGrad_
)
{
pipeline
.
push_back
(
*
cvtOutGrad_
);
}
CHECK
(
inVal_
);
// add bwdWgt handle
if
(
bias
)
{
bwdWgt_
.
reset
(
new
conv_bwdWgt
(
*
wgtPD
,
*
inVal_
,
*
out
,
*
wgt
,
*
bias
));
...
...
@@ -431,99 +350,13 @@ void MKLDNNConvLayer::resetBwdPipeline(
if
(
dataPD
==
nullptr
)
{
return
;
}
if
(
cvtWgtVal_
)
{
pipeline
.
push_back
(
*
cvtWgtVal_
);
}
// add bwdData handle
CHECK
(
wgtValBwdData_
)
<<
"Should have weight memory"
;
bwdData_
.
reset
(
new
conv_bwdData
(
*
dataPD
,
*
out
,
*
wgtValBwdData_
,
*
in
));
pipeline
.
push_back
(
*
bwdData_
);
if
(
cvtInGrad_
)
{
pipeline
.
push_back
(
*
cvtInGrad_
);
}
}
void
MKLDNNConvLayer
::
resetOutGrad
(
std
::
shared_ptr
<
conv_bwdWgt
::
primitive_desc
>&
wgtPD
,
MKLDNNMatrixPtr
&
out
)
{
cpuOutGrad_
=
nullptr
;
cvtOutGrad_
=
nullptr
;
CHECK
(
outVal_
!=
nullptr
&&
outVal_
->
getPrimitiveDesc
()
==
wgtPD
->
diff_dst_primitive_desc
())
<<
"primitive desc of out grad and value should be equal"
;
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
// always share the same grad data of CPU output
// then the activation can get the right grad from output_.grad
output_
.
grad
->
setData
(
cpuOut
->
getData
());
// same PrimitiveDesc with cpuInVal_
CHECK
(
cpuOutVal_
);
cpuOutGrad_
=
MKLDNNMatrix
::
create
(
cpuOut
,
cpuOutVal_
->
getPrimitiveDesc
());
// create reorder if primitive desc does not match
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
!=
outVal_
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
outVal_
->
getPrimitiveDesc
());
cvtOutGrad_
=
MKLDNNMatrix
::
createReorder
(
cpuOutGrad_
,
out
);
CHECK
(
cvtOutGrad_
);
}
else
{
out
=
cpuOutGrad_
;
}
}
}
void
MKLDNNConvLayer
::
resetWgtBiasGrad
(
std
::
shared_ptr
<
conv_bwdWgt
::
primitive_desc
>&
wgtPD
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
)
{
wgt
=
MKLDNNMatrix
::
create
(
weight_
->
getWGrad
(),
wgtPD
->
diff_weights_primitive_desc
());
CHECK
(
nullptr
!=
wgtVal_
&&
wgt
->
getPrimitiveDesc
()
==
wgtVal_
->
getPrimitiveDesc
())
<<
"primitive desc of weight grad and value should be equal"
;
VLOG
(
MKLDNN_FMTS
)
<<
"weight grad format: "
<<
wgt
->
getFormat
();
bias
=
nullptr
;
if
(
biasVal_
==
nullptr
)
{
return
;
}
bias
=
MKLDNNMatrix
::
create
(
biases_
->
getWGrad
(),
wgtPD
->
diff_bias_primitive_desc
());
CHECK
(
bias
->
getPrimitiveDesc
()
==
biasVal_
->
getPrimitiveDesc
())
<<
"primitive desc of bias grad should equal the bias value"
;
}
void
MKLDNNConvLayer
::
resetInGrad
(
std
::
shared_ptr
<
conv_bwdData
::
primitive_desc
>&
dataPD
,
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
cpuInGrad_
=
nullptr
;
cvtInGrad_
=
nullptr
;
if
(
dataPD
==
nullptr
)
{
return
;
}
if
(
inputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetInGrad
(
in
,
dataPD
->
diff_src_primitive_desc
());
CHECK
(
nullptr
!=
inVal_
&&
in
->
getPrimitiveDesc
()
==
inVal_
->
getPrimitiveDesc
())
<<
"primitive desc of input grad and value should be equal"
;
}
else
{
const
MatrixPtr
&
cpuIn
=
getInputGrad
(
0
,
CPU_DEVICE
);
// same PrimitiveDesc with cpuInVal_
CHECK
(
cpuInVal_
);
cpuInGrad_
=
MKLDNNMatrix
::
create
(
cpuIn
,
cpuInVal_
->
getPrimitiveDesc
());
in
=
cpuInGrad_
;
// create reorder if PrimitiveDesc does not match
if
(
cpuInGrad_
->
getPrimitiveDesc
()
!=
dataPD
->
diff_src_primitive_desc
())
{
in
=
MKLDNNMatrix
::
create
(
getInputGrad
(
0
,
MKLDNN_DEVICE
),
dataPD
->
diff_src_primitive_desc
());
cvtInGrad_
=
MKLDNNMatrix
::
createReorder
(
in
,
cpuInGrad_
);
CHECK
(
cvtInGrad_
);
}
}
}
void
MKLDNNConvLayer
::
resetWgtValBwdData
(
...
...
@@ -537,8 +370,7 @@ void MKLDNNConvLayer::resetWgtValBwdData(
// since the primitive_desc would be different with wgtVal_
CHECK
(
wgtVal_
)
<<
"should have weight value"
;
if
(
dataPD
->
weights_primitive_desc
()
!=
wgtVal_
->
getPrimitiveDesc
())
{
wgtValBwdData_
=
MKLDNNMatrix
::
create
(
nullptr
,
dataPD
->
weights_primitive_desc
());
wgtValBwdData_
=
MKLDNNMatrix
::
create
(
dataPD
->
weights_primitive_desc
());
cvtWgtVal_
=
MKLDNNMatrix
::
createReorder
(
wgtVal_
,
wgtValBwdData_
);
CHECK
(
cvtWgtVal_
);
}
else
{
...
...
paddle/gserver/layers/MKLDNNConvLayer.h
浏览文件 @
abce9eb7
...
...
@@ -48,17 +48,6 @@ protected:
// save forward primitive_desc, which can be used backward
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>
fwdPD_
;
// MKLDNNMatrixPtr which should be created from CPU Device
MKLDNNMatrixPtr
cpuInVal_
;
MKLDNNMatrixPtr
cpuInGrad_
;
MKLDNNMatrixPtr
cpuOutVal_
;
MKLDNNMatrixPtr
cpuOutGrad_
;
// convert handle between CPU device and MKLDNN device
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtInVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtInGrad_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutGrad_
;
// whether the weight has been init
bool
hasInitedWgt_
;
...
...
@@ -94,8 +83,6 @@ public:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
override
;
void
updateInputData
()
override
;
void
updateWeights
(
const
UpdateCallback
&
callback
)
override
;
void
convertWeightsFromPaddle
()
override
;
...
...
@@ -109,26 +96,6 @@ public:
<<
", sw: "
<<
sw_
<<
", dh: "
<<
dh_
<<
", dw: "
<<
dw_
;
}
void
printValueFormatFlow
()
override
{
if
(
cpuInVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
cpuInVal_
->
getFormat
()
<<
" >>>"
;
}
MKLDNNLayer
::
printValueFormatFlow
();
if
(
cpuOutVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
" >>> "
<<
cpuOutVal_
->
getFormat
();
}
}
void
printGradFormatFlow
()
override
{
if
(
cpuInGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
cpuInGrad_
->
getFormat
()
<<
" <<<"
;
}
MKLDNNLayer
::
printGradFormatFlow
();
if
(
cpuOutGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
" <<< "
<<
cpuOutGrad_
->
getFormat
();
}
}
protected:
/**
* load the dims settings of this conv
...
...
@@ -162,23 +129,6 @@ protected:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
);
/**
* reset MKLDNNMatrix of input value
*/
void
resetInValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
);
/**
* reset MKLDNNMatrix of weight and bias value
*/
void
resetWgtBiasValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
);
/**
* reset MKLDNNMatrix of output value
*/
void
resetOutValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
out
);
/**
* reset the backward weight primitive descriptor.
*/
...
...
@@ -207,22 +157,6 @@ protected:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
);
/**
* reset MKLDNNMatrix of output grad
*/
void
resetOutGrad
(
std
::
shared_ptr
<
conv_bwdWgt
::
primitive_desc
>&
wgtPD
,
MKLDNNMatrixPtr
&
out
);
/**
* reset MKLDNNMatrix of weight and bias grad
*/
void
resetWgtBiasGrad
(
std
::
shared_ptr
<
conv_bwdWgt
::
primitive_desc
>&
wgtPD
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
);
/**
* reset MKLDNNMatrix of input grad
*/
void
resetInGrad
(
std
::
shared_ptr
<
conv_bwdData
::
primitive_desc
>&
dataPD
,
MKLDNNMatrixPtr
&
in
);
/**
* reset MKLDNNMatrix of weight value for backward data
* since the primitive_desc would be different with wgtVal_
...
...
paddle/gserver/layers/MKLDNNFcLayer.cpp
浏览文件 @
abce9eb7
...
...
@@ -62,7 +62,7 @@ void MKLDNNFcLayer::convertWeightsFromPaddle() {
CHECK
(
wgtVal_
)
<<
"should have been initialized"
;
bool
hasNoSpatial_
=
ih_
==
1
&&
iw_
==
1
;
auto
targetDim
=
wgtVal_
->
getDims
();
auto
srcFmt
=
hasNoSpatial_
?
memory
::
format
::
io
:
memory
::
format
::
ihwo
;
auto
srcFmt
=
hasNoSpatial_
?
format
::
io
:
format
::
ihwo
;
wgtVal_
->
reorderDataFrom
(
wgtVal_
,
srcFmt
,
targetDim
);
hasInitedWgt_
=
true
;
}
...
...
@@ -71,7 +71,7 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
CHECK
(
wgtVal_
)
<<
"should have been initialized"
;
bool
hasNoSpatial_
=
ih_
==
1
&&
iw_
==
1
;
auto
targetDim
=
wgtVal_
->
getDims
();
auto
dstFmt
=
hasNoSpatial_
?
memory
::
format
::
io
:
memory
::
format
::
ihwo
;
auto
dstFmt
=
hasNoSpatial_
?
format
::
io
:
format
::
ihwo
;
wgtVal_
->
reorderDataTo
(
wgtVal_
,
dstFmt
,
targetDim
);
}
...
...
@@ -100,8 +100,6 @@ void MKLDNNFcLayer::resetFwd(std::vector<primitive>& pipeline,
resetFwdPD
(
fwdPD_
,
in
,
wgt
,
bias
,
out
);
resetFwdPipeline
(
pipeline
,
fwdPD_
,
in
,
wgt
,
bias
,
out
);
printValueFormatFlow
();
}
void
MKLDNNFcLayer
::
resetBwd
(
std
::
vector
<
primitive
>&
pipeline
,
...
...
@@ -119,12 +117,6 @@ void MKLDNNFcLayer::resetBwd(std::vector<primitive>& pipeline,
resetBwdDataPD
(
bwdDataPD
,
in
,
out
);
resetBwdPipeline
(
pipeline
,
bwdWgtPD
,
bwdDataPD
,
in
,
wgt
,
bias
,
out
);
printGradFormatFlow
();
}
void
MKLDNNFcLayer
::
updateInputData
()
{
inVal_
->
setData
(
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
());
}
void
MKLDNNFcLayer
::
updateWeights
(
const
UpdateCallback
&
callback
)
{
...
...
@@ -139,51 +131,30 @@ void MKLDNNFcLayer::resetFwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
resetInValue
(
in
);
resetWgtBiasValue
(
wgt
,
bias
);
resetOutValue
(
out
);
}
void
MKLDNNFcLayer
::
resetInValue
(
MKLDNNMatrixPtr
&
in
)
{
if
(
inputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
dnnIn
=
getInputValue
(
0
);
in
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
dnnIn
);
CHECK
(
in
)
<<
"Input should be MKLDNNMatrix"
;
}
else
{
CHECK_EQ
(
getPrev
(
0
)
->
getDeviceId
(),
CPU_DEVICE
)
<<
"Only support CPU yet"
;
const
MatrixPtr
&
cpuIn
=
getInputValue
(
0
,
CPU_DEVICE
);
in
=
MKLDNNMatrix
::
create
(
cpuIn
,
{
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
,
engine_
);
}
CHECK
(
in
);
in
->
downSpatial
();
}
void
MKLDNNFcLayer
::
resetWgtBiasValue
(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
)
{
auto
outPD
=
MKLDNNMatrix
::
createPrimitiveDesc
({
bs_
,
oc_
},
format
::
nc
,
engine_
);
resetOutValue
(
out
,
outPD
);
format
wgtFmt
=
format
::
oihw
;
if
(
in
Val_
->
getFormat
()
==
format
::
nChw8c
)
{
if
(
in
->
getFormat
()
==
format
::
nChw8c
)
{
wgtFmt
=
format
::
oIhw8i
;
}
else
if
(
in
Val_
->
getFormat
()
==
format
::
nChw16c
)
{
}
else
if
(
in
->
getFormat
()
==
format
::
nChw16c
)
{
wgtFmt
=
format
::
oIhw16i
;
}
wgt
=
MKLDNNMatrix
::
create
(
weight_
->
getW
(),
{
oc_
,
ic_
,
ih_
,
iw_
},
wgtFmt
,
engine_
);
auto
wgtPD
=
MKLDNNMatrix
::
createPrimitiveDesc
({
oc_
,
ic_
,
ih_
,
iw_
},
wgtFmt
,
engine_
);
resetWithMatrix
(
wgt
,
weight_
->
getW
(),
wgtPD
);
wgt
->
downSpatial
();
VLOG
(
MKLDNN_FMTS
)
<<
"Weight value format: "
<<
wgt
->
getFormat
();
bias
=
(
biases_
&&
biases_
->
getW
())
?
MKLDNNMatrix
::
create
(
biases_
->
getW
(),
{
oc_
},
format
::
x
,
engine_
)
:
nullptr
;
}
void
MKLDNNFcLayer
::
resetOutValue
(
MKLDNNMatrixPtr
&
out
)
{
out
=
MKLDNNMatrix
::
create
(
output_
.
value
,
{
bs_
,
oc_
},
format
::
nc
,
engine_
);
if
(
!
outputIsOnlyMKLDNN
())
{
// fc cpu output value do not need create convert, just share data
getOutput
(
CPU_DEVICE
).
value
->
setData
(
out
->
getData
())
;
if
(
biases_
&&
biases_
->
getW
()
)
{
auto
biasPD
=
MKLDNNMatrix
::
createPrimitiveDesc
({
oc_
},
format
::
x
,
engine_
);
resetWithMatrix
(
bias
,
biases_
->
getW
(),
biasPD
);
}
else
{
bias
=
nullptr
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
out
);
}
void
MKLDNNFcLayer
::
resetFwdPD
(
std
::
shared_ptr
<
fc_fwd
::
primitive_desc
>&
pd
,
...
...
@@ -219,7 +190,6 @@ void MKLDNNFcLayer::resetFwdPipeline(
}
else
{
fwd_
.
reset
(
new
fc_fwd
(
*
pd
,
*
in
,
*
wgt
,
*
out
));
}
pipeline
.
push_back
(
*
fwd_
);
}
...
...
@@ -227,44 +197,18 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
resetOutGrad
(
out
);
resetWgtBiasGrad
(
wgt
,
bias
);
resetInGrad
(
in
);
}
void
MKLDNNFcLayer
::
resetOutGrad
(
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
outVal_
);
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
output_
.
grad
->
setData
(
cpuOut
->
getData
());
out
=
MKLDNNMatrix
::
create
(
cpuOut
,
outVal_
->
getPrimitiveDesc
());
}
}
CHECK
(
inVal_
&&
outVal_
);
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
void
MKLDNNFcLayer
::
resetWgtBiasGrad
(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
)
{
CHECK
(
wgtVal_
);
wgt
=
MKLDNNMatrix
::
create
(
weight_
->
getWGrad
(),
wgtVal_
->
getPrimitiveDesc
());
resetWithMatrix
(
wgt
,
weight_
->
getWGrad
(),
wgtVal_
->
getPrimitiveDesc
());
if
(
biasVal_
)
{
resetWithMatrix
(
bias
,
biases_
->
getWGrad
(),
biasVal_
->
getPrimitiveDesc
());
}
else
{
bias
=
nullptr
;
if
(
biasVal_
==
nullptr
)
{
return
;
}
bias
=
MKLDNNMatrix
::
create
(
biases_
->
getWGrad
(),
biasVal_
->
getPrimitiveDesc
());
}
void
MKLDNNFcLayer
::
resetInGrad
(
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
if
(
inputLayers_
[
0
]
->
getOutput
().
grad
==
nullptr
)
{
return
;
}
CHECK
(
inVal_
);
MKLDNNLayer
::
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
}
void
MKLDNNFcLayer
::
resetBwdWgtPD
(
...
...
paddle/gserver/layers/MKLDNNFcLayer.h
浏览文件 @
abce9eb7
...
...
@@ -66,8 +66,6 @@ public:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
override
;
void
updateInputData
()
override
;
void
updateWeights
(
const
UpdateCallback
&
callback
)
override
;
void
convertWeightsFromPaddle
()
override
;
...
...
@@ -84,9 +82,6 @@ protected:
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
);
void
resetInValue
(
MKLDNNMatrixPtr
&
in
);
void
resetWgtBiasValue
(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
);
void
resetOutValue
(
MKLDNNMatrixPtr
&
out
);
void
resetFwdPD
(
std
::
shared_ptr
<
fc_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
in
,
MKLDNNMatrixPtr
wgt
,
...
...
@@ -109,9 +104,6 @@ protected:
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
);
void
resetOutGrad
(
MKLDNNMatrixPtr
&
out
);
void
resetWgtBiasGrad
(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
);
void
resetInGrad
(
MKLDNNMatrixPtr
&
in
);
void
resetBwdWgtPD
(
std
::
shared_ptr
<
fc_bwdWgt
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
...
...
paddle/gserver/layers/MKLDNNLayer.cpp
0 → 100644
浏览文件 @
abce9eb7
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MKLDNNLayer.h"
using
namespace
mkldnn
;
// NOLINT
typedef
memory
::
format
format
;
namespace
paddle
{
bool
MKLDNNLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
CHECK
(
FLAGS_use_mkldnn
)
<<
"MkldnnLayers only support use_mkldnn."
<<
"Please set WITH_MKLDNN=ON "
<<
"and set use_mkldnn=True"
;
CHECK
(
!
useGpu_
)
<<
"Do not support GPU yet"
;
// set device id before Layer::init
setDevice
(
MKLDNN_DEVICE
);
// change param device to MKLDNN device
setParamsDevice
(
MKLDNN_DEVICE
,
parameterMap
);
if
(
!
Layer
::
init
(
layerMap
,
parameterMap
))
{
return
false
;
}
setOutputMap
();
checkCPUOutputsNumber
();
stream_
.
reset
(
new
MKLDNNStream
());
engine_
=
CPUEngine
::
Instance
().
getEngine
();
return
true
;
}
void
MKLDNNLayer
::
forward
(
PassType
passType
)
{
passType_
=
passType
;
{
REGISTER_TIMER_INFO
(
"mkldnn_FwdTimer"
,
getName
().
c_str
());
CHECK
(
!
inputLayers_
.
empty
());
copySeqInfoToOutputs
();
size_t
elemenCnt
=
inputLayers_
[
0
]
->
getOutputValue
()
->
getElementCnt
();
if
(
inputElemenCnt_
!=
elemenCnt
)
{
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" reset mkldnn forward"
;
// reset when input total sizes changed, not only the batchsize
inputElemenCnt_
=
elemenCnt
;
pipelineFwd_
.
clear
();
reshape
(
bs_
,
ic_
,
ih_
,
iw_
,
oc_
,
oh_
,
ow_
);
// all cpu device output grad or value share output's
shareCPUDevice
();
resetFwd
(
pipelineFwd_
,
inVal_
,
wgtVal_
,
biasVal_
,
outVal_
);
// MKLDNNLayer output value should be MKLDNNMatrix
// so external output value is necessary.
// Then external input value is not necessary,
// since input may be mkldnn internal buffer.
CHECK
(
extOutVal_
)
<<
"external output value is necessary"
;
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
extOutVal_
);
CHECK
(
inVal_
&&
outVal_
)
<<
"internal memories are necessary"
;
if
(
cvtInVal_
)
{
pipelineFwd_
.
insert
(
pipelineFwd_
.
begin
(),
*
cvtInVal_
);
}
if
(
cvtOutVal_
)
{
pipelineFwd_
.
push_back
(
*
cvtOutVal_
);
}
convertWeightsFromPaddle
();
printSizeInfo
();
printValueFormat
();
needResetBwd_
=
true
;
}
if
(
inputLayers_
[
0
]
->
getType
()
==
"data"
)
{
// Update input value data when input layer is "data" type,
// since the input value data address might be changed.
CHECK
(
extInVal_
);
extInVal_
->
setData
(
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
());
}
if
(
!
outputOnlyMKLDNN_
)
{
clearGrads
();
}
stream_
->
submit
(
pipelineFwd_
);
}
{
REGISTER_TIMER_INFO
(
"FwActTimer"
,
getName
().
c_str
());
forwardActivation
();
}
}
void
MKLDNNLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
if
(
needResetBwd_
)
{
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" reset mkldnn backward"
;
pipelineBwd_
.
clear
();
pipelineMergeGrad_
.
clear
();
mergeGrad_
=
nullptr
;
resetBwd
(
pipelineBwd_
,
inGrad_
,
wgtGrad_
,
biasGrad_
,
outGrad_
);
// external output grad is not necessary
// since output may be mkldnn internal buffer or merge them directly.
CHECK
(
outGrad_
)
<<
"internal output grad is necessary"
;
if
(
extOutGrad_
)
{
CHECK_EQ
(
extOutGrad_
->
getData
(),
output_
.
grad
->
getData
())
<<
"the external buffer should share the same data with output_.grad"
;
}
if
(
cvtOutGrad_
)
{
pipelineBwd_
.
insert
(
pipelineBwd_
.
begin
(),
*
cvtOutGrad_
);
}
if
(
cvtInGrad_
)
{
pipelineBwd_
.
push_back
(
*
cvtInGrad_
);
}
printGradFormat
();
needResetBwd_
=
false
;
}
// merge grad must before backward activation
if
(
mergeGrad_
)
{
REGISTER_TIMER_INFO
(
"MergeBpGrad"
,
getName
().
c_str
());
stream_
->
submit
(
pipelineMergeGrad_
);
}
{
REGISTER_TIMER_INFO
(
"BpActTimer"
,
getName
().
c_str
());
backwardActivation
();
}
{
REGISTER_TIMER_INFO
(
"mkldnn_bwdTimer"
,
getName
().
c_str
());
stream_
->
submit
(
pipelineBwd_
);
}
{
REGISTER_TIMER_INFO
(
"WeightUpdate"
,
getName
().
c_str
());
updateWeights
(
callback
);
}
}
void
MKLDNNLayer
::
reshapeInput
(
int
&
batchsize
,
int
&
height
,
int
&
width
)
{
const
Argument
&
input
=
inputLayers_
[
0
]
->
getOutput
();
batchsize
=
input
.
getBatchSize
();
int
h
=
input
.
getFrameHeight
();
int
w
=
input
.
getFrameWidth
();
if
(
h
!=
0
)
{
height
=
h
;
}
if
(
w
!=
0
)
{
width
=
w
;
}
}
void
MKLDNNLayer
::
reshapeOutput
(
size_t
height
,
size_t
width
)
{
output_
.
setFrameHeight
(
height
);
output_
.
setFrameWidth
(
width
);
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
outputOtherDevice_
[
i
].
setFrameHeight
(
height
);
outputOtherDevice_
[
i
].
setFrameWidth
(
width
);
}
}
void
MKLDNNLayer
::
resetWithMatrix
(
MKLDNNMatrixPtr
&
dnn
,
const
MatrixPtr
&
mat
,
memory
::
primitive_desc
pd
)
{
dnn
=
nullptr
;
if
(
mat
==
nullptr
)
{
return
;
}
dnn
=
MKLDNNMatrix
::
create
(
pd
,
mat
);
}
void
MKLDNNLayer
::
resetInValue
(
MKLDNNMatrixPtr
&
in
,
const
std
::
shared_ptr
<
memory
::
primitive_desc
>&
intPD
)
{
cvtInVal_
=
nullptr
;
extInVal_
=
nullptr
;
in
=
nullptr
;
CHECK_GT
(
bs_
*
ic_
*
ih_
*
iw_
,
0
);
auto
extPD
=
MKLDNNMatrix
::
createPrimitiveDesc
(
{
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
,
engine_
);
const
MatrixPtr
&
inMat
=
inputLayers_
[
0
]
->
getOutputValue
();
in
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
inMat
);
CHECK_EQ
(
inputIsOnlyMKLDNN
(),
in
!=
nullptr
);
if
(
in
==
nullptr
||
in
->
getFormat
()
==
format
::
nc
)
{
in
=
MKLDNNMatrix
::
create
(
extPD
,
inMat
);
}
extInVal_
=
isPaddleFormat
(
in
->
getFormat
())
?
in
:
nullptr
;
if
(
in
->
getFormat
()
==
format
::
nc
)
{
CHECK
(
ih_
==
1
&&
iw_
==
1
);
}
if
(
nullptr
==
intPD
||
in
->
getPrimitiveDesc
()
==
*
intPD
)
{
return
;
}
// need create reorder
in
=
MKLDNNMatrix
::
create
(
*
intPD
);
extInVal_
=
extInVal_
?
extInVal_
:
MKLDNNMatrix
::
create
(
extPD
,
inMat
);
cvtInVal_
=
MKLDNNMatrix
::
createReorder
(
extInVal_
,
in
);
CHECK
(
cvtInVal_
)
<<
"should not be emptry"
;
}
void
MKLDNNLayer
::
resetOutValue
(
MKLDNNMatrixPtr
&
out
,
memory
::
primitive_desc
intPD
)
{
cvtOutVal_
=
nullptr
;
out
=
MKLDNNMatrix
::
create
(
intPD
,
output_
.
value
);
extOutVal_
=
out
;
if
(
outputIsOnlyMKLDNN
()
||
isPaddleFormat
(
extOutVal_
->
getFormat
()))
{
return
;
}
// need create reorder
CHECK_GT
(
bs_
*
oc_
*
oh_
*
ow_
,
0
);
extOutVal_
=
MKLDNNMatrix
::
create
(
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
},
format
::
nchw
,
engine_
,
output_
.
value
);
out
=
MKLDNNMatrix
::
create
(
intPD
);
cvtOutVal_
=
MKLDNNMatrix
::
createReorder
(
out
,
extOutVal_
);
CHECK
(
cvtOutVal_
)
<<
"should not be empty"
;
}
void
MKLDNNLayer
::
resetInGrad
(
MKLDNNMatrixPtr
&
in
,
memory
::
primitive_desc
intPD
)
{
cvtInGrad_
=
nullptr
;
extInGrad_
=
nullptr
;
in
=
nullptr
;
LayerPtr
&
input
=
inputLayers_
[
0
];
if
(
input
->
getOutputGrad
()
==
nullptr
)
{
// no need input grad
return
;
}
CHECK
(
inputIsOnlyMKLDNN
()
||
input
->
getOutputMapSize
()
<=
1
)
<<
"only support input is MKLDNN layer or only have one output layer"
;
// when input is a mkldnn branch node,
// this layer will save input grad to a internal buffer,
// and the mkldnn input layer will merge them to actual prev->output_.grad
const
MatrixPtr
&
inMat
=
input
->
getOutputMapSize
()
<=
1
?
input
->
getOutputGrad
()
:
nullptr
;
in
=
MKLDNNMatrix
::
create
(
intPD
,
inMat
);
Argument
&
arg
=
input
->
getOutput
(
this
->
getName
());
arg
.
grad
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
in
);
CHECK
(
inVal_
);
CHECK
(
inVal_
->
getPrimitiveDesc
()
==
intPD
)
<<
"the primitive desc must equal"
;
if
(
inputIsOnlyMKLDNN
())
{
return
;
}
extInGrad_
=
in
;
if
(
isPaddleFormat
(
extInGrad_
->
getFormat
()))
{
return
;
}
// need create reorder
// TODO(TJ): add macro definition to simplify it
CHECK
(
extInVal_
!=
nullptr
&&
isPaddleFormat
(
extInVal_
->
getFormat
()))
<<
"should have external input value and the format must be nchw(nc)"
;
extInGrad_
=
MKLDNNMatrix
::
create
(
extInVal_
->
getPrimitiveDesc
(),
inMat
);
CHECK
(
inVal_
!=
nullptr
&&
inVal_
->
getPrimitiveDesc
()
==
intPD
)
<<
"should have internal input value and primitive desc must equal"
;
in
=
MKLDNNMatrix
::
create
(
intPD
);
cvtInGrad_
=
MKLDNNMatrix
::
createReorder
(
in
,
extInGrad_
);
CHECK
(
cvtInGrad_
);
}
void
MKLDNNLayer
::
resetOutGrad
(
MKLDNNMatrixPtr
&
out
,
memory
::
primitive_desc
intPD
)
{
cvtOutGrad_
=
nullptr
;
extOutGrad_
=
nullptr
;
out
=
nullptr
;
MatrixPtr
&
outMat
=
output_
.
grad
;
out
=
MKLDNNMatrix
::
create
(
intPD
,
outMat
);
resetMergeGrad
(
out
);
if
(
outputIsOnlyMKLDNN
())
{
return
;
}
CHECK_LE
(
outputMap_
.
size
(),
1U
)
<<
"do not support mixed with cpu device"
;
extOutGrad_
=
out
;
if
(
isPaddleFormat
(
extOutGrad_
->
getFormat
()))
{
return
;
}
// need create reorder
CHECK
(
extOutVal_
!=
nullptr
&&
isPaddleFormat
(
extOutVal_
->
getFormat
()))
<<
"should have external output value and the format must be nchw(nc)"
;
extOutGrad_
=
MKLDNNMatrix
::
create
(
extOutVal_
->
getPrimitiveDesc
(),
outMat
);
CHECK
(
outVal_
!=
nullptr
&&
outVal_
->
getPrimitiveDesc
()
==
intPD
)
<<
"should have internal output value and primitive desc must equal"
;
out
=
MKLDNNMatrix
::
create
(
intPD
);
cvtOutGrad_
=
MKLDNNMatrix
::
createReorder
(
extOutGrad_
,
out
);
CHECK
(
cvtOutGrad_
);
}
void
MKLDNNLayer
::
resetMergeGrad
(
MKLDNNMatrixPtr
&
out
)
{
mergeGrad_
=
nullptr
;
pipelineMergeGrad_
.
clear
();
if
(
outputMap_
.
size
()
<=
1
||
!
outputIsOnlyMKLDNN
())
{
// do not merge when output is not all MKLDNN or only one output
return
;
}
CHECK
(
out
)
<<
"should have reset internal ouput grad"
;
std
::
vector
<
double
>
scales
(
outputMap_
.
size
(),
1.0
);
std
::
vector
<
memory
::
primitive_desc
>
srcPDs
;
std
::
vector
<
primitive
::
at
>
srcs
;
for
(
auto
it
=
outputMap_
.
begin
();
it
!=
outputMap_
.
end
();
++
it
)
{
MKLDNNMatrixPtr
src
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
it
->
second
->
grad
);
CHECK
(
src
)
<<
"should be MKLDNNMatrix"
;
auto
srcDims
=
src
->
getDims
();
auto
dstDims
=
out
->
getDims
();
CHECK_EQ
(
srcDims
.
size
(),
dstDims
.
size
());
for
(
size_t
i
=
0
;
i
<
srcDims
.
size
();
++
i
)
{
CHECK_EQ
(
srcDims
[
i
],
dstDims
[
i
]);
}
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" has output grad "
<<
it
->
first
<<
", format "
<<
src
->
getFormat
();
srcPDs
.
push_back
(
src
->
getPrimitiveDesc
());
srcs
.
push_back
(
*
src
);
}
// TODO(TJ): remove me when mkldnn sum support different formats
for
(
size_t
i
=
1
;
i
<
srcPDs
.
size
();
++
i
)
{
CHECK
(
srcPDs
[
0
]
==
srcPDs
[
i
]);
}
tmpOutGrad_
=
out
;
tmpCvt_
=
nullptr
;
if
(
out
->
getPrimitiveDesc
()
!=
srcPDs
[
0
])
{
tmpOutGrad_
=
MKLDNNMatrix
::
create
(
srcPDs
[
0
]);
tmpCvt_
=
MKLDNNMatrix
::
createReorder
(
tmpOutGrad_
,
out
);
CHECK
(
tmpCvt_
);
pipelineMergeGrad_
.
push_back
(
*
tmpCvt_
);
}
auto
sumPD
=
sum
::
primitive_desc
(
tmpOutGrad_
->
getMemoryDesc
(),
scales
,
srcPDs
);
mergeGrad_
.
reset
(
new
sum
(
sumPD
,
srcs
,
*
tmpOutGrad_
));
pipelineMergeGrad_
.
insert
(
pipelineMergeGrad_
.
begin
(),
*
mergeGrad_
);
}
}
// namespace paddle
paddle/gserver/layers/MKLDNNLayer.h
浏览文件 @
abce9eb7
...
...
@@ -58,11 +58,31 @@ protected:
std
::
vector
<
mkldnn
::
primitive
>
pipelineFwd_
;
std
::
vector
<
mkldnn
::
primitive
>
pipelineBwd_
;
// MKLDNNMatrixPtr with internal format
/* Value and grad are seperated as internal and external buffers.
* Each MKLDNNLayer must init or reset internal buffer at least,
* and the external buffer format is always nchw of nc(when h==w==1),
* which is the same format as paddle.
* The output_.value and output_.grad always save the external data,
* when mixed with cpu device.
* When all layers are mkldnn layers, they could save internal data.
*/
// below MKLDNNMatrix buffers are all internal buffers
MKLDNNMatrixPtr
inVal_
;
MKLDNNMatrixPtr
inGrad_
;
MKLDNNMatrixPtr
outVal_
;
MKLDNNMatrixPtr
outGrad_
;
// below are external value and grad
MKLDNNMatrixPtr
extInVal_
;
MKLDNNMatrixPtr
extInGrad_
;
MKLDNNMatrixPtr
extOutVal_
;
MKLDNNMatrixPtr
extOutGrad_
;
// convert handle between external and internal buffers
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtInVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtInGrad_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutGrad_
;
// weight and bias are always internal buffers
MKLDNNMatrixPtr
wgtVal_
;
MKLDNNMatrixPtr
wgtGrad_
;
MKLDNNMatrixPtr
biasVal_
;
...
...
@@ -91,6 +111,7 @@ public:
oh_
(
0
),
ow_
(
0
),
needResetBwd_
(
true
),
outputOnlyMKLDNN_
(
false
),
engine_
(
mkldnn
::
engine
::
cpu
,
0
),
stream_
(
nullptr
),
fwd_
(
nullptr
),
...
...
@@ -99,92 +120,9 @@ public:
~
MKLDNNLayer
()
{}
virtual
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
CHECK
(
FLAGS_use_mkldnn
)
<<
"MkldnnLayers only support use_mkldnn."
<<
"Please set WITH_MKLDNN=ON "
<<
"and set use_mkldnn=True"
;
CHECK
(
!
useGpu_
)
<<
"Do not support GPU yet"
;
// set device id before Layer::init
setDevice
(
MKLDNN_DEVICE
);
// change param device to MKLDNN device
setParamsDevice
(
MKLDNN_DEVICE
,
parameterMap
);
if
(
!
Layer
::
init
(
layerMap
,
parameterMap
))
{
return
false
;
}
setOutputMap
();
checkCPUOutputsNumber
();
stream_
.
reset
(
new
MKLDNNStream
());
engine_
=
CPUEngine
::
Instance
().
getEngine
();
return
true
;
}
void
forward
(
PassType
passType
)
override
{
passType_
=
passType
;
{
REGISTER_TIMER_INFO
(
"mkldnn_FwdTimer"
,
getName
().
c_str
());
CHECK
(
!
inputLayers_
.
empty
());
copySeqInfoToOutputs
();
size_t
elemenCnt
=
inputLayers_
[
0
]
->
getOutput
().
value
->
getElementCnt
();
if
(
inputElemenCnt_
!=
elemenCnt
)
{
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" reset mkldnn forward"
;
// reset when input total sizes changed, not only the batchsize
inputElemenCnt_
=
elemenCnt
;
pipelineFwd_
.
clear
();
reshape
(
bs_
,
ic_
,
ih_
,
iw_
,
oc_
,
oh_
,
ow_
);
resetFwd
(
pipelineFwd_
,
inVal_
,
wgtVal_
,
biasVal_
,
outVal_
);
convertWeightsFromPaddle
();
needResetBwd_
=
true
;
}
if
(
inputLayers_
[
0
]
->
getType
()
==
"data"
)
{
updateInputData
();
}
if
(
!
outputOnlyMKLDNN_
)
{
clearGrads
();
}
stream_
->
submit
(
pipelineFwd_
);
}
/* activation */
{
REGISTER_TIMER_INFO
(
"FwActTimer"
,
getName
().
c_str
());
forwardActivation
();
}
}
void
backward
(
const
UpdateCallback
&
callback
)
override
{
if
(
needResetBwd_
)
{
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" reset mkldnn backward"
;
pipelineBwd_
.
clear
();
pipelineMergeGrad_
.
clear
();
mergeGrad_
=
nullptr
;
resetBwd
(
pipelineBwd_
,
inGrad_
,
wgtGrad_
,
biasGrad_
,
outGrad_
);
needResetBwd_
=
false
;
}
// merge grad must before backward activation
if
(
mergeGrad_
)
{
REGISTER_TIMER_INFO
(
"MergeBpGrad"
,
getName
().
c_str
());
stream_
->
submit
(
pipelineMergeGrad_
);
}
{
REGISTER_TIMER_INFO
(
"BpActTimer"
,
getName
().
c_str
());
backwardActivation
();
}
{
REGISTER_TIMER_INFO
(
"mkldnn_bwdTimer"
,
getName
().
c_str
());
stream_
->
submit
(
pipelineBwd_
);
}
{
REGISTER_TIMER_INFO
(
"WeightUpdate"
,
getName
().
c_str
());
updateWeights
(
callback
);
}
}
virtual
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
);
virtual
void
forward
(
PassType
passType
);
virtual
void
backward
(
const
UpdateCallback
&
callback
);
/**
* reshape the input image sizes
...
...
@@ -195,7 +133,7 @@ public:
int
&
bs
,
int
&
ic
,
int
&
ih
,
int
&
iw
,
int
oc
,
int
&
oh
,
int
&
ow
)
=
0
;
/**
* reset the mkldnn forward primitve and memor
y
* reset the mkldnn forward primitve and memor
ies
* only would be called when input size changes
*/
virtual
void
resetFwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
...
...
@@ -205,7 +143,7 @@ public:
MKLDNNMatrixPtr
&
out
)
=
0
;
/**
* reset the mkldnn backward primitve and memor
y for mkldnn fc
* reset the mkldnn backward primitve and memor
ies
* only would be called when needed
*/
virtual
void
resetBwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
...
...
@@ -214,12 +152,6 @@ public:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
=
0
;
/**
* Update input value data when input layer is "data" type.
* Since the input value data address might be changed.
*/
virtual
void
updateInputData
()
{}
/**
* Update weights and biases if necessary.
*/
...
...
@@ -246,131 +178,78 @@ protected:
/**
* reshape the input image sizes and input batchsize
*/
virtual
void
reshapeInput
(
int
&
batchsize
,
int
&
height
,
int
&
width
)
{
const
Argument
&
input
=
inputLayers_
[
0
]
->
getOutput
();
batchsize
=
input
.
getBatchSize
();
int
h
=
input
.
getFrameHeight
();
int
w
=
input
.
getFrameWidth
();
if
(
h
!=
0
)
{
height
=
h
;
}
if
(
w
!=
0
)
{
width
=
w
;
}
}
void
reshapeInput
(
int
&
batchsize
,
int
&
height
,
int
&
width
);
/**
* reshape output image sizes
*/
virtual
void
reshapeOutput
(
size_t
height
,
size_t
width
)
{
output_
.
setFrameHeight
(
height
);
output_
.
setFrameWidth
(
width
);
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
outputOtherDevice_
[
i
].
setFrameHeight
(
height
);
outputOtherDevice_
[
i
].
setFrameWidth
(
width
);
}
}
void
reshapeOutput
(
size_t
height
,
size_t
width
);
/**
* reset the output grad matrix from primitive desc.
* and reset the merge grad primitive if needed.
* note: when this layer has serval outputs,
* it could not be mixed with cpu device,
* since it can not get memory desc from cpu device.
* reset MKLDNNMatrix from Matrix and internal primitive desc.
* reset nullptr if matrix or primitive desc is empty
*/
virtual
void
resetOutGrad
(
MKLDNNMatrixPtr
&
out
,
mkldnn
::
memory
::
primitive_desc
pd
)
{
CHECK
(
outputIsOnlyMKLDNN
())
<<
"do not support mixed with other device yet"
;
mergeGrad_
=
nullptr
;
pipelineMergeGrad_
.
clear
();
out
=
MKLDNNMatrix
::
create
(
output_
.
grad
,
pd
);
if
(
outputMap_
.
size
()
<=
1
)
{
return
;
}
std
::
vector
<
double
>
scales
(
outputMap_
.
size
(),
1.0
);
std
::
vector
<
mkldnn
::
memory
::
primitive_desc
>
srcPDs
;
std
::
vector
<
mkldnn
::
primitive
::
at
>
srcs
;
for
(
auto
it
=
outputMap_
.
begin
();
it
!=
outputMap_
.
end
();
++
it
)
{
MKLDNNMatrixPtr
src
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
it
->
second
->
grad
);
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" has output grad "
<<
it
->
first
;
CHECK
(
src
)
<<
"should be MKLDNNMatrix"
;
auto
srcDims
=
src
->
getDims
();
auto
dstDims
=
out
->
getDims
();
CHECK_EQ
(
srcDims
.
size
(),
dstDims
.
size
());
for
(
size_t
i
=
0
;
i
<
srcDims
.
size
();
++
i
)
{
CHECK_EQ
(
srcDims
[
i
],
dstDims
[
i
]);
}
srcPDs
.
push_back
(
src
->
getPrimitiveDesc
());
srcs
.
push_back
(
*
src
);
}
// TODO(TJ): remove me when mkldnn sum support different formats
for
(
size_t
i
=
1
;
i
<
srcPDs
.
size
();
++
i
)
{
CHECK
(
srcPDs
[
0
]
==
srcPDs
[
i
]);
}
tmpOutGrad_
=
nullptr
;
tmpCvt_
=
nullptr
;
if
(
out
->
getPrimitiveDesc
()
!=
srcPDs
[
0
])
{
tmpOutGrad_
=
MKLDNNMatrix
::
create
(
nullptr
,
srcPDs
[
0
]);
tmpCvt_
=
MKLDNNMatrix
::
createReorder
(
tmpOutGrad_
,
out
);
CHECK
(
tmpCvt_
);
pipelineMergeGrad_
.
push_back
(
*
tmpCvt_
);
}
else
{
tmpOutGrad_
=
out
;
}
void
resetWithMatrix
(
MKLDNNMatrixPtr
&
dnn
,
const
MatrixPtr
&
mat
,
mkldnn
::
memory
::
primitive_desc
pd
);
auto
sumPD
=
mkldnn
::
sum
::
primitive_desc
(
tmpOutGrad_
->
getMemoryDesc
(),
scales
,
srcPDs
);
mergeGrad_
.
reset
(
new
mkldnn
::
sum
(
sumPD
,
srcs
,
*
tmpOutGrad_
));
pipelineMergeGrad_
.
insert
(
pipelineMergeGrad_
.
begin
(),
*
mergeGrad_
);
}
/**
* reset input value from input MKLDNNMatrix and internal primitive desc.
* reset both internal and external buffer and create reorder if necessary.
*/
void
resetInValue
(
MKLDNNMatrixPtr
&
in
,
const
std
::
shared_ptr
<
mkldnn
::
memory
::
primitive_desc
>&
intPD
=
nullptr
);
/**
* reset input grad from primitive desc.
* this function is avaiable for input is only mkldnn
* or input do not care cpu device
* reset output value from internal primitive desc.
* reset both internal and external buffer and create reorder if necessary.
*/
virtual
void
resetInGrad
(
MKLDNNMatrixPtr
&
in
,
mkldnn
::
memory
::
primitive_desc
pd
)
{
LayerPtr
&
input
=
inputLayers_
[
0
];
const
MatrixPtr
&
grad
=
input
->
getOutputMapSize
()
>
1
?
nullptr
:
input
->
getOutput
().
grad
;
in
=
MKLDNNMatrix
::
create
(
grad
,
pd
);
Argument
&
arg
=
input
->
getOutput
(
this
->
getName
());
arg
.
grad
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
in
);
}
void
resetOutValue
(
MKLDNNMatrixPtr
&
out
,
mkldnn
::
memory
::
primitive_desc
intPD
);
/**
* print info about sizes
* reset input grad from internal primitive desc.
* reset both internal and external buffer and create reorder if necessary.
*/
virtual
void
printSizeInfo
()
{
VLOG
(
MKLDNN_SIZES
)
<<
getName
()
<<
": bs: "
<<
bs_
<<
", ic: "
<<
ic_
<<
", ih: "
<<
ih_
<<
", iw: "
<<
iw_
<<
", oc: "
<<
oc_
<<
", oh: "
<<
oh_
<<
", ow: "
<<
ow_
;
}
void
resetInGrad
(
MKLDNNMatrixPtr
&
in
,
mkldnn
::
memory
::
primitive_desc
intPD
);
/**
* Print the mkldnn memory format flow of value
* reset output grad from internal primitive desc.
* merge grad if necessary.
* reset both internal and external buffer and create reorder if necessary.
* note: about merge grad, when this layer has several outputs,
* it could not be mixed with cpu device,
* since it can not get memory desc from cpu device.
*/
virtual
void
printValueFormatFlow
()
{
if
(
inVal_
&&
outVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
inVal_
->
getFormat
()
<<
" >>> "
<<
outVal_
->
getFormat
();
}
}
void
resetOutGrad
(
MKLDNNMatrixPtr
&
out
,
mkldnn
::
memory
::
primitive_desc
intPD
);
/**
* reset the merge grad primitive if necessary.
* note: do not support the grads mixed with cpu device,
* since it can not get memory desc from cpu device.
*/
void
resetMergeGrad
(
MKLDNNMatrixPtr
&
out
);
protected:
/**
*
Print the mkldnn memory format flow of grad
*
Set deviceId of this layer.
*/
virtual
void
printGradFormatFlow
()
{
if
(
inGrad_
&&
outGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
inGrad_
->
getFormat
()
<<
" <<< "
<<
outGrad_
->
getFormat
();
void
setDevice
(
int
id
)
{
deviceId_
=
id
;
}
/**
* check the format is nchw or nc,
* which is supported by Paddle default memory layout
*/
bool
isPaddleFormat
(
mkldnn
::
memory
::
format
fmt
)
{
if
(
fmt
==
mkldnn
::
memory
::
format
::
nchw
||
fmt
==
mkldnn
::
memory
::
format
::
nc
)
{
return
true
;
}
else
{
return
false
;
}
}
protected:
/**
* If input only has MKLDNN device.
* Otherwise, only support the previous layer using CPU device.
...
...
@@ -380,7 +259,6 @@ protected:
if
(
prevDevice
==
MKLDNN_DEVICE
)
{
return
true
;
}
else
{
// do not support GPU yet
CHECK_EQ
(
prevDevice
,
CPU_DEVICE
)
<<
"Only support CPU yet"
;
return
false
;
}
...
...
@@ -400,9 +278,61 @@ protected:
}
/**
*
Set deviceId of this layer.
*
print info about sizes
*/
void
setDevice
(
int
id
)
{
deviceId_
=
id
;
}
virtual
void
printSizeInfo
()
{
VLOG
(
MKLDNN_SIZES
)
<<
getName
()
<<
": bs: "
<<
bs_
<<
", ic: "
<<
ic_
<<
", ih: "
<<
ih_
<<
", iw: "
<<
iw_
<<
", oc: "
<<
oc_
<<
", oh: "
<<
oh_
<<
", ow: "
<<
ow_
;
}
/**
* print the mkldnn memory format of value
*/
virtual
void
printValueFormat
()
{
if
(
extInVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
extInVal_
->
getFormat
()
<<
" >>> "
;
}
if
(
inVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
inVal_
->
getFormat
()
<<
" >>>"
;
}
if
(
outVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
outVal_
->
getFormat
()
<<
" >>> "
;
}
if
(
extOutVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
extOutVal_
->
getFormat
();
}
if
(
wgtVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"Weight value format: "
<<
wgtVal_
->
getFormat
();
}
if
(
biasVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"Bias value format: "
<<
biasVal_
->
getFormat
();
}
}
/**
* print the mkldnn memory format of grad
*/
virtual
void
printGradFormat
()
{
if
(
extOutGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
extOutGrad_
->
getFormat
();
}
if
(
outGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
outGrad_
->
getFormat
()
<<
" <<< "
;
}
if
(
inGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
inGrad_
->
getFormat
()
<<
" <<<"
;
}
if
(
extInGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
extInGrad_
->
getFormat
()
<<
" <<< "
;
}
if
(
wgtGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"Weight grad format: "
<<
wgtGrad_
->
getFormat
();
}
if
(
biasGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"Bias grad format: "
<<
biasGrad_
->
getFormat
();
}
}
private:
/**
...
...
@@ -449,6 +379,19 @@ private:
}
}
/**
* if have cpu device, share value and grad data with output_
*/
void
shareCPUDevice
()
{
if
(
outputIsOnlyMKLDNN
())
{
return
;
}
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
outputOtherDevice_
[
i
].
value
=
output_
.
value
;
outputOtherDevice_
[
i
].
grad
=
output_
.
grad
;
}
}
/**
* Check the cpu device number of outputOtherDevice_.
* should have only one at most.
...
...
paddle/gserver/layers/MKLDNNPoolLayer.cpp
浏览文件 @
abce9eb7
...
...
@@ -85,8 +85,6 @@ void MKLDNNPoolLayer::resetFwd(std::vector<primitive>& pipeline,
resetFwdPD
(
fwdPD_
,
in
,
out
);
resetFwdPipeline
(
pipeline
,
fwdPD_
,
in
,
out
);
printValueFormatFlow
();
}
void
MKLDNNPoolLayer
::
resetBwd
(
std
::
vector
<
primitive
>&
pipeline
,
...
...
@@ -101,65 +99,22 @@ void MKLDNNPoolLayer::resetBwd(std::vector<primitive>& pipeline,
resetBwdPD
(
pd
,
in
,
out
);
resetBwdPipeline
(
pipeline
,
pd
,
in
,
out
);
printGradFormatFlow
();
}
void
MKLDNNPoolLayer
::
updateInputData
()
{
inVal_
->
setData
(
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
());
}
void
MKLDNNPoolLayer
::
resetFwdBuffers
(
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
resetInValue
(
in
);
resetOutValue
(
out
);
}
void
MKLDNNPoolLayer
::
resetInValue
(
MKLDNNMatrixPtr
&
in
)
{
if
(
inputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
dnnIn
=
getInputValue
(
0
);
in
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
dnnIn
);
CHECK
(
in
)
<<
"Input should be MKLDNNMatrix"
;
}
else
{
CHECK_EQ
(
getPrev
(
0
)
->
getDeviceId
(),
CPU_DEVICE
)
<<
"Only support CPU yet"
;
const
MatrixPtr
&
cpuIn
=
getInputValue
(
0
,
CPU_DEVICE
);
in
=
MKLDNNMatrix
::
create
(
cpuIn
,
{
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
,
engine_
);
}
}
void
MKLDNNPoolLayer
::
resetOutValue
(
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
inVal_
)
<<
"Should reset input value first"
;
memory
::
dims
outDims
=
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
};
out
=
MKLDNNMatrix
::
create
(
output_
.
value
,
outDims
,
inVal_
->
getFormat
(),
engine_
);
// create reorder if output value has cpu device and pd do not match
cpuOutVal_
=
nullptr
;
cvtOutVal_
=
nullptr
;
if
(
!
outputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
value
;
cpuOutVal_
=
MKLDNNMatrix
::
create
(
cpuOut
,
outDims
,
format
::
nchw
,
engine_
);
if
(
cpuOutVal_
->
getPrimitiveDesc
()
!=
out
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
out
->
getPrimitiveDesc
());
cvtOutVal_
=
MKLDNNMatrix
::
createReorder
(
out
,
cpuOutVal_
);
CHECK
(
cvtOutVal_
)
<<
"should not be emptry"
;
}
else
{
cpuOut
->
setData
(
output_
.
value
->
getData
());
cpuOutVal_
=
out
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
cpuOutVal_
);
return
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
outVal_
);
CHECK
(
in
);
auto
outPD
=
MKLDNNMatrix
::
createPrimitiveDesc
(
outDims
,
in
->
getFormat
(),
engine_
);
resetOutValue
(
out
,
outPD
);
}
void
MKLDNNPoolLayer
::
resetFwdPD
(
std
::
shared_ptr
<
pool_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
in
,
MKLDNNMatrixPtr
out
)
{
memory
::
dims
inDims
=
memory
::
dims
{
bs_
,
ic_
,
ih_
,
iw_
};
memory
::
dims
outDims
=
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
};
memory
::
dims
kernels
=
memory
::
dims
{
fh_
,
fw_
};
memory
::
dims
strides
=
memory
::
dims
{
sh_
,
sw_
};
memory
::
dims
padL
=
memory
::
dims
{
ph_
,
pw_
};
...
...
@@ -194,58 +149,26 @@ void MKLDNNPoolLayer::resetFwdPipeline(
?
std
::
make_shared
<
pool_fwd
>
(
pool_fwd
(
*
pd
,
*
in
,
*
out
,
*
workspace_
))
:
std
::
make_shared
<
pool_fwd
>
(
pool_fwd
(
*
pd
,
*
in
,
*
out
));
pipeline
.
push_back
(
*
fwd_
);
if
(
cvtOutVal_
)
{
pipeline
.
push_back
(
*
cvtOutVal_
);
}
}
void
MKLDNNPoolLayer
::
resetBwdBuffers
(
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
resetOutGrad
(
out
);
resetInGrad
(
in
);
}
void
MKLDNNPoolLayer
::
resetOutGrad
(
MKLDNNMatrixPtr
&
out
)
{
cpuOutGrad_
=
nullptr
;
cvtOutGrad_
=
nullptr
;
CHECK
(
outVal_
);
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
// always share the same grad data of CPU output
// then the activation can get the right grad from output_.grad
output_
.
grad
->
setData
(
cpuOut
->
getData
());
cpuOutGrad_
=
MKLDNNMatrix
::
create
(
cpuOut
,
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
},
format
::
nchw
,
engine_
);
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
!=
outVal_
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
outVal_
->
getPrimitiveDesc
());
cvtOutGrad_
=
MKLDNNMatrix
::
createReorder
(
cpuOutGrad_
,
out
);
CHECK
(
cvtOutGrad_
)
<<
"should not be emptry"
;
}
else
{
out
=
cpuOutGrad_
;
}
}
}
void
MKLDNNPoolLayer
::
resetInGrad
(
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
if
(
inputLayers_
[
0
]
->
getOutput
().
grad
==
nullptr
)
{
return
;
}
CHECK
(
inVal_
);
MKLDNNLayer
::
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
CHECK
(
inVal_
&&
outVal_
);
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
}
void
MKLDNNPoolLayer
::
resetBwdPD
(
std
::
shared_ptr
<
pool_bwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
pd
=
nullptr
;
if
(
in
==
nullptr
)
{
return
;
}
memory
::
dims
kernels
=
memory
::
dims
{
fh_
,
fw_
};
memory
::
dims
strides
=
memory
::
dims
{
sh_
,
sw_
};
memory
::
dims
padL
=
memory
::
dims
{
ph_
,
pw_
};
memory
::
dims
padR
=
getPaddingR
();
CHECK
(
in
);
CHECK
(
out
);
auto
bwdDesc
=
pool_bwd
::
desc
(
poolAlgo_
,
in
->
getMemoryDesc
(),
...
...
@@ -263,8 +186,8 @@ void MKLDNNPoolLayer::resetBwdPipeline(
std
::
shared_ptr
<
pool_bwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
if
(
cvtOutGrad_
)
{
pipeline
.
push_back
(
*
cvtOutGrad_
)
;
if
(
pd
==
nullptr
)
{
return
;
}
bwdData_
=
...
...
paddle/gserver/layers/MKLDNNPoolLayer.h
浏览文件 @
abce9eb7
...
...
@@ -38,13 +38,6 @@ protected:
// pooling_avg or pooling_max
mkldnn
::
algorithm
poolAlgo_
;
// MKLDNNMatrixPtr which should be created from CPU Device
MKLDNNMatrixPtr
cpuOutVal_
;
MKLDNNMatrixPtr
cpuOutGrad_
;
// convert handle between CPU device and MKLDNN device
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutGrad_
;
// save forward primitive_desc, which can be used backward
std
::
shared_ptr
<
pool_fwd
::
primitive_desc
>
fwdPD_
;
// according to https://github.com/01org/mkl-dnn/blob/master/tests/gtests/
...
...
@@ -74,8 +67,6 @@ public:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
override
;
void
updateInputData
()
override
;
void
printSizeInfo
()
override
{
MKLDNNLayer
::
printSizeInfo
();
VLOG
(
MKLDNN_SIZES
)
<<
getName
()
<<
": fh: "
<<
fh_
<<
", fw: "
<<
fw_
...
...
@@ -90,8 +81,6 @@ protected:
* reset pipeline.
*/
void
resetFwdBuffers
(
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
);
void
resetInValue
(
MKLDNNMatrixPtr
&
in
);
void
resetOutValue
(
MKLDNNMatrixPtr
&
out
);
void
resetFwdPD
(
std
::
shared_ptr
<
pool_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
in
,
MKLDNNMatrixPtr
out
);
...
...
@@ -106,8 +95,6 @@ protected:
* reset pipeline.
*/
void
resetBwdBuffers
(
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
);
void
resetOutGrad
(
MKLDNNMatrixPtr
&
out
);
void
resetInGrad
(
MKLDNNMatrixPtr
&
in
);
void
resetBwdPD
(
std
::
shared_ptr
<
pool_bwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
);
...
...
paddle/gserver/tests/MKLDNNTester.cpp
浏览文件 @
abce9eb7
...
...
@@ -97,7 +97,7 @@ void MKLDNNTester::randomWgtDatas() {
parameters_
[
REF
][
i
]
->
randomize
();
dnnValue
->
copyFrom
(
*
refValue
);
VLOG
(
lvl_
)
<<
"Random weight data
"
<<
parameters_
[
DNN
][
i
]
->
getName
();
VLOG
(
MKLDNN_TESTS
)
<<
"Random weight
"
<<
parameters_
[
DNN
][
i
]
->
getName
();
printVector
(
dnnValue
);
}
}
...
...
@@ -109,7 +109,7 @@ void MKLDNNTester::randomBotDatas() {
dataLayers_
[
REF
][
i
]
->
getOutputValue
()
->
randomizeUniform
();
dataLayers_
[
DNN
][
i
]
->
getOutputValue
()
->
copyFrom
(
*
(
dataLayers_
[
REF
][
i
]
->
getOutputValue
()));
VLOG
(
lvl_
)
<<
"Input "
<<
i
<<
" data:"
;
VLOG
(
MKLDNN_TESTS
)
<<
"Random Foward, InputValue "
<<
i
;
printMatrix
(
dataLayers_
[
REF
][
i
]
->
getOutputValue
());
}
}
...
...
@@ -118,12 +118,12 @@ void MKLDNNTester::randomTopDiffs() {
refLayer_
->
getOutputGrad
()
->
randomizeUniform
();
dnnLayer_
->
getOutput
(
CPU_DEVICE
)
.
grad
->
copyFrom
(
*
(
refLayer_
->
getOutputGrad
()));
VLOG
(
lvl_
)
<<
"Random Backward Input, TopDiff:
"
;
VLOG
(
MKLDNN_TESTS
)
<<
"Random Backward, OutputGrad
"
;
printMatrix
(
refLayer_
->
getOutputGrad
());
}
void
MKLDNNTester
::
checkForward
()
{
VLOG
(
MKLDNN_
ALL
)
<<
"Check Forward"
;
VLOG
(
MKLDNN_
TESTS
)
<<
"Check Forward"
;
printTopDatas
();
double
delta
=
compareMatrix
(
dnnLayer_
->
getOutputValue
(),
refLayer_
->
getOutputValue
());
...
...
@@ -131,15 +131,15 @@ void MKLDNNTester::checkForward() {
}
void
MKLDNNTester
::
checkBackwardData
()
{
VLOG
(
MKLDNN_
ALL
)
<<
"Check Backward Data"
;
VLOG
(
MKLDNN_
TESTS
)
<<
"Check Backward Data"
;
// TODO(TJ): uncomment me when batch norm ready
// const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm";
for
(
size_t
i
=
0
;
i
<
dataLayers_
[
DNN
].
size
();
++
i
)
{
const
MatrixPtr
&
dnnDiff
=
dataLayers_
[
DNN
][
i
]
->
getOutputGrad
();
const
MatrixPtr
&
refDiff
=
dataLayers_
[
REF
][
i
]
->
getOutputGrad
();
VLOG
(
lvl_
)
<<
"Mkldnn Backward Output BotDiff
"
<<
i
;
VLOG
(
MKLDNN_ALL
)
<<
"MKLDNN Backward Result: InputGrad
"
<<
i
;
printMatrix
(
dnnDiff
);
VLOG
(
lvl_
)
<<
"Reference Backward Output BotDiff
"
<<
i
;
VLOG
(
MKLDNN_ALL
)
<<
"Reference Backward Result: InputGrad
"
<<
i
;
printMatrix
(
refDiff
);
double
delta
=
compareMatrix
(
dnnDiff
,
refDiff
);
...
...
@@ -153,7 +153,7 @@ void MKLDNNTester::checkBackwardData() {
}
void
MKLDNNTester
::
checkBackwardWgts
()
{
VLOG
(
MKLDNN_
ALL
)
<<
"Check Backward Weight"
;
VLOG
(
MKLDNN_
TESTS
)
<<
"Check Backward Weight"
;
CHECK_EQ
(
parameters_
[
DNN
].
size
(),
parameters_
[
REF
].
size
());
vector
<
VectorPtr
>
dnnWgts
;
// used to temply save mkldnn weights
saveWgt
(
parameters_
[
DNN
],
dnnWgts
);
...
...
@@ -165,9 +165,11 @@ void MKLDNNTester::checkBackwardWgts() {
for
(
size_t
i
=
0
;
i
<
parameters_
[
DNN
].
size
();
++
i
)
{
const
VectorPtr
&
dnn
=
parameters_
[
DNN
][
i
]
->
getBuf
(
PARAMETER_VALUE
);
const
VectorPtr
&
ref
=
parameters_
[
REF
][
i
]
->
getBuf
(
PARAMETER_VALUE
);
VLOG
(
lvl_
)
<<
"Mkldnn Output weight "
<<
parameters_
[
DNN
][
i
]
->
getName
();
VLOG
(
MKLDNN_ALL
)
<<
"MKLDNN Result: weight value"
<<
parameters_
[
DNN
][
i
]
->
getName
();
printVector
(
dnn
);
VLOG
(
lvl_
)
<<
"Reference Output weight "
<<
parameters_
[
REF
][
i
]
->
getName
();
VLOG
(
MKLDNN_ALL
)
<<
"Reference Result: weight value "
<<
parameters_
[
REF
][
i
]
->
getName
();
printVector
(
ref
);
double
delta
=
compareVector
(
dnn
,
ref
);
...
...
@@ -240,7 +242,8 @@ void MKLDNNTester::printTopDatas() {
}
for
(
int
n
=
0
;
n
<
NUM
;
++
n
)
{
VLOG
(
lvl_
)
<<
testLayers_
[
n
]
->
getType
()
<<
" forward output TopData: "
;
VLOG
(
MKLDNN_ALL
)
<<
testLayers_
[
n
]
->
getType
()
<<
" Forward Result: OutputValue"
;
printMatrix
(
testLayers_
[
n
]
->
getOutputValue
());
}
}
...
...
@@ -252,7 +255,7 @@ void MKLDNNTester::printMatrix(const MatrixPtr& m) {
std
::
ostringstream
ostr
;
m
->
print
(
ostr
);
VLOG
(
lvl_
)
<<
std
::
endl
<<
ostr
.
str
();
VLOG
(
MKLDNN_ALL
)
<<
std
::
endl
<<
ostr
.
str
();
}
void
MKLDNNTester
::
printVector
(
const
VectorPtr
&
v
)
{
...
...
@@ -262,7 +265,7 @@ void MKLDNNTester::printVector(const VectorPtr& v) {
std
::
ostringstream
ostr
;
v
->
print
(
ostr
,
v
->
getSize
());
VLOG
(
lvl_
)
<<
std
::
endl
<<
ostr
.
str
();
VLOG
(
MKLDNN_ALL
)
<<
std
::
endl
<<
ostr
.
str
();
}
double
MKLDNNTester
::
getDelta
(
const
real
*
d1
,
...
...
@@ -314,7 +317,7 @@ void MKLDNNTester::runOnce() {
UpdateCallback
updateCallback
=
[](
Parameter
*
para
)
{
auto
&
grad
=
para
->
getBuf
(
PARAMETER_GRADIENT
);
auto
&
value
=
para
->
getBuf
(
PARAMETER_VALUE
);
real
lr
=
1e-
3
;
real
lr
=
1e-
2
;
value
->
add
(
*
grad
,
lr
);
grad
->
zeroMem
();
};
...
...
@@ -340,10 +343,9 @@ void MKLDNNTester::run(const TestConfig& dnn,
size_t
batchSize
,
size_t
inputImgH
,
size_t
inputImgW
,
bool
printDetails
,
size_t
iter
,
float
epsilon
,
bool
log
,
int
level
)
{
float
epsilon
)
{
CHECK
(
dnn
.
layerConfig
.
type
().
compare
(
0
,
7
,
"mkldnn_"
)
==
0
||
dnn
.
layerConfig
.
active_type
().
compare
(
0
,
7
,
"mkldnn_"
)
==
0
)
<<
"should be MKLDNN layer or MKLDNN activation"
;
...
...
@@ -359,10 +361,9 @@ void MKLDNNTester::run(const TestConfig& dnn,
ih_
=
inputImgH
;
iw_
=
inputImgW
;
log_
=
printDetails
;
iter_
=
iter
;
eps_
=
epsilon
;
log_
=
log
;
lvl_
=
level
;
// Firstly test mkldnn init from PARAM_FORMAT_ORIGINAL weight
reset
(
dnn
,
ref
,
batchSize
);
...
...
@@ -531,9 +532,11 @@ void MKLDNNTester::getOutResult(const std::string& configPath,
void
MKLDNNTester
::
compareResult
(
DataOut
&
ref
,
DataOut
&
dnn
,
float
eps
)
{
CHECK_EQ
(
ref
.
outValues
.
size
(),
dnn
.
outValues
.
size
());
CHECK_EQ
(
ref
.
paraValues
.
size
(),
dnn
.
paraValues
.
size
());
VLOG
(
MKLDNN_TESTS
)
<<
"compare value size: "
<<
ref
.
outValues
.
size
();
for
(
size_t
i
=
0
;
i
<
ref
.
outValues
.
size
();
i
++
)
{
EXPECT_LE
(
fabs
(
compareMatrix
(
ref
.
outValues
[
i
],
dnn
.
outValues
[
i
])),
eps
);
}
VLOG
(
MKLDNN_TESTS
)
<<
"compare param size: "
<<
ref
.
outValues
.
size
();
for
(
size_t
i
=
0
;
i
<
ref
.
paraValues
.
size
();
i
++
)
{
EXPECT_LE
(
fabs
(
compareVector
(
ref
.
paraValues
[
i
],
dnn
.
paraValues
[
i
])),
eps
);
}
...
...
@@ -544,9 +547,10 @@ void MKLDNNTester::runBranchesTest(const std::string& configPath,
float
eps
)
{
DataIn
in
;
initArgument
(
in
,
configPath
,
iter
);
DataOut
outCpu
,
outDnn
;
VLOG
(
MKLDNN_TESTS
)
<<
"runing cpu network"
;
getOutResult
(
configPath
,
in
,
outCpu
,
false
,
iter
);
VLOG
(
MKLDNN_TESTS
)
<<
"runing mkldnn network"
;
getOutResult
(
configPath
,
in
,
outDnn
,
true
,
iter
);
compareResult
(
outCpu
,
outDnn
,
eps
);
...
...
paddle/gserver/tests/MKLDNNTester.h
浏览文件 @
abce9eb7
...
...
@@ -58,8 +58,6 @@ protected:
size_t
iter_
;
/// whether to print out the details
bool
log_
;
/// vlog level to print the matrix details datas
int
lvl_
;
/// epsilon
float
eps_
;
/// input image size, default 1
...
...
@@ -70,7 +68,6 @@ public:
iter_
=
iter
;
eps_
=
epsilon
;
log_
=
false
;
lvl_
=
MKLDNN_ALL
;
}
~
MKLDNNTester
()
{}
...
...
@@ -81,10 +78,9 @@ public:
size_t
batchSize
,
size_t
inputImgH
=
1
,
size_t
inputImgW
=
1
,
bool
printDetails
=
false
,
size_t
iter
=
3
,
float
epsilon
=
1e-4
,
bool
log
=
false
,
int
level
=
MKLDNN_ALL
);
float
epsilon
=
1e-4
);
static
void
runBranchesTest
(
const
std
::
string
&
configPath
,
size_t
iter
=
3
,
float
eps
=
1e-4
);
...
...
paddle/math/MKLDNNMatrix.cpp
浏览文件 @
abce9eb7
...
...
@@ -18,7 +18,7 @@ using namespace mkldnn; // NOLINT
namespace
paddle
{
MKLDNNMatrixPtr
MKLDNNMatrix
::
create
(
MatrixPtr
m
,
memory
::
primitive_desc
pd
)
{
MKLDNNMatrixPtr
MKLDNNMatrix
::
create
(
memory
::
primitive_desc
pd
,
MatrixPtr
m
)
{
memory
::
desc
md
=
pd
.
desc
();
size_t
ndims
=
md
.
data
.
ndims
;
int
*
dims
=
md
.
data
.
dims
;
...
...
@@ -41,12 +41,12 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) {
return
std
::
make_shared
<
MKLDNNMatrix
>
(
cpuMatrix
,
pd
);
}
MKLDNNMatrixPtr
MKLDNNMatrix
::
create
(
MatrixPtr
m
,
memory
::
dims
dims
,
MKLDNNMatrixPtr
MKLDNNMatrix
::
create
(
memory
::
dims
dims
,
memory
::
format
fmt
,
engine
&
eg
,
MatrixPtr
m
,
mkldnn
::
memory
::
data_type
dtype
)
{
return
create
(
m
,
memory
::
primitive_desc
(
memory
::
desc
(
dims
,
dtype
,
fmt
),
eg
)
);
return
create
(
createPrimitiveDesc
(
dims
,
fmt
,
eg
,
dtype
),
m
);
}
std
::
shared_ptr
<
reorder
>
MKLDNNMatrix
::
createReorder
(
const
MKLDNNMatrixPtr
&
src
,
...
...
paddle/math/MKLDNNMatrix.h
浏览文件 @
abce9eb7
...
...
@@ -40,24 +40,37 @@ public:
/**
* Create MKLDNNMatrix from a MatrixPtr and memory primitive_desc
*/
static
MKLDNNMatrixPtr
create
(
MatrixPtr
m
,
mkldnn
::
memory
::
primitive_desc
pd
);
static
MKLDNNMatrixPtr
create
(
mkldnn
::
memory
::
primitive_desc
pd
,
MatrixPtr
m
=
nullptr
);
/**
* Create MKLDNNMatrix from a MatrixPtr and memory details info
*/
static
MKLDNNMatrixPtr
create
(
MatrixPtr
m
,
mkldnn
::
memory
::
dims
dims
,
mkldnn
::
memory
::
format
fmt
,
mkldnn
::
engine
&
eg
,
MatrixPtr
m
=
nullptr
,
mkldnn
::
memory
::
data_type
dtype
=
mkldnn
::
memory
::
data_type
::
f32
);
/**
* Create primitive descriptor.
* default with f32 dtype
*/
static
mkldnn
::
memory
::
primitive_desc
createPrimitiveDesc
(
const
mkldnn
::
memory
::
dims
dims
,
const
mkldnn
::
memory
::
format
&
fmt
,
const
mkldnn
::
engine
&
eg
,
const
mkldnn
::
memory
::
data_type
&
dtype
=
mkldnn
::
memory
::
data_type
::
f32
)
{
return
mkldnn
::
memory
::
primitive_desc
(
memory
::
desc
(
dims
,
dtype
,
fmt
),
eg
);
}
/**
* Create Memory descriptor.
* default with any format and f32 dtype
*/
static
mkldnn
::
memory
::
desc
createMemoryDesc
(
const
mkldnn
::
memory
::
dims
&
dims
,
const
mkldnn
::
memory
::
dims
dims
,
const
mkldnn
::
memory
::
format
&
fmt
=
mkldnn
::
memory
::
format
::
any
,
const
mkldnn
::
memory
::
data_type
&
dtype
=
mkldnn
::
memory
::
data_type
::
f32
)
{
return
mkldnn
::
memory
::
desc
(
dims
,
dtype
,
fmt
);
...
...
paddle/trainer/tests/sample_trainer_config_branch_net.conf
浏览文件 @
abce9eb7
...
...
@@ -17,7 +17,7 @@ from paddle.trainer_config_helpers import *
################################### Data Configuration ###################################
TrainData
(
ProtoData
(
files
=
"trainer/tests/mnist.list"
))
################################### Algorithm Configuration ###################################
settings
(
batch_size
=
256
,
settings
(
batch_size
=
128
,
learning_method
=
MomentumOptimizer
(
momentum
=
0
.
5
,
sparse
=
False
))
################################### Network Configuration ###################################
data
=
data_layer
(
name
=
"input"
,
size
=
784
)
...
...
@@ -44,10 +44,11 @@ a2 = img_conv_layer(input=tmp,
shared_biases
=
True
,
act
=
ReluActivation
())
tmp
=
concat_layer
(
input
=[
a1
,
a2
])
tmp
=
addto_layer
(
input
=[
a1
,
a2
],
act
=
ReluActivation
(),
bias_attr
=
False
)
tmp
=
img_pool_layer
(
input
=
tmp
,
num_channels
=
64
,
pool_size
=
3
,
stride
=
2
,
padding
=
1
,
...
...
@@ -55,35 +56,34 @@ tmp = img_pool_layer(input=tmp,
b1
=
img_conv_layer
(
input
=
tmp
,
filter_size
=
3
,
num_filters
=
64
,
num_filters
=
32
,
padding
=
1
,
shared_biases
=
True
,
act
=
ReluActivation
())
b1
=
img_pool_layer
(
input
=
b1
,
pool_size
=
3
,
stride
=
1
,
padding
=
1
,
stride
=
2
,
padding
=
0
,
pool_type
=
MaxPooling
())
b2
=
img_conv_layer
(
input
=
tmp
,
filter_size
=
5
,
filter_size
=
3
,
num_filters
=
64
,
padding
=
2
,
padding
=
1
,
shared_biases
=
True
,
act
=
ReluActivation
())
b2
=
img_pool_layer
(
input
=
b2
,
pool_size
=
5
,
stride
=
1
,
padding
=
2
,
stride
=
2
,
padding
=
1
,
pool_type
=
MaxPooling
())
tmp
=
addto_layer
(
input
=[
b1
,
b2
],
act
=
ReluActivation
(),
bias_attr
=
False
)
tmp
=
concat_layer
(
input
=[
b1
,
b2
])
tmp
=
img_pool_layer
(
input
=
tmp
,
num_channels
=
96
,
pool_size
=
3
,
stride
=
2
,
padding
=
1
,
...
...
paddle/trainer/tests/sample_trainer_config_simple_net.conf
浏览文件 @
abce9eb7
...
...
@@ -17,7 +17,7 @@ from paddle.trainer_config_helpers import *
################################### Data Configuration ###################################
TrainData
(
ProtoData
(
files
=
"trainer/tests/mnist.list"
))
################################### Algorithm Configuration ###################################
settings
(
batch_size
=
1
000
,
settings
(
batch_size
=
1
28
,
learning_method
=
MomentumOptimizer
(
momentum
=
0
.
5
,
sparse
=
False
))
################################### Network Configuration ###################################
data
=
data_layer
(
name
=
"input"
,
size
=
784
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录