Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
17b4cea4
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
17b4cea4
编写于
10月 15, 2017
作者:
T
Tao Luo
提交者:
GitHub
10月 15, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4759 from tensor-tang/merge_grad
Merge mkldnn output grad
上级
e593113a
7a7c8fd9
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
285 addition
and
88 deletion
+285
-88
paddle/gserver/layers/Layer.h
paddle/gserver/layers/Layer.h
+6
-0
paddle/gserver/layers/MKLDNNConvLayer.cpp
paddle/gserver/layers/MKLDNNConvLayer.cpp
+37
-39
paddle/gserver/layers/MKLDNNFcLayer.cpp
paddle/gserver/layers/MKLDNNFcLayer.cpp
+11
-21
paddle/gserver/layers/MKLDNNLayer.h
paddle/gserver/layers/MKLDNNLayer.h
+102
-4
paddle/gserver/layers/MKLDNNPoolLayer.cpp
paddle/gserver/layers/MKLDNNPoolLayer.cpp
+12
-13
paddle/gserver/tests/MKLDNNTester.cpp
paddle/gserver/tests/MKLDNNTester.cpp
+2
-2
paddle/trainer/tests/CMakeLists.txt
paddle/trainer/tests/CMakeLists.txt
+12
-9
paddle/trainer/tests/sample_trainer_config_branch_net.conf
paddle/trainer/tests/sample_trainer_config_branch_net.conf
+103
-0
未找到文件。
paddle/gserver/layers/Layer.h
浏览文件 @
17b4cea4
...
...
@@ -86,6 +86,7 @@ protected:
/// Also used in 'use_mkldnn' case.
std
::
vector
<
Argument
>
outputOtherDevice_
;
/// If there are several outputs, map them by each name.
/// MKLDNNLayer use it only to merge output grad
std
::
map
<
std
::
string
,
Argument
*>
outputMap_
;
/// Used to merge grad on different devices.
MatrixPtr
tmpGrad_
;
...
...
@@ -325,6 +326,11 @@ public:
outputMap_
[
name
]
=
output
;
}
/**
* Get the output map size, if layer has multi-output.
*/
size_t
getOutputMapSize
()
{
return
outputMap_
.
size
();
}
/**
* Get the output based on layer's name.
*/
...
...
paddle/gserver/layers/MKLDNNConvLayer.cpp
浏览文件 @
17b4cea4
...
...
@@ -225,8 +225,6 @@ void MKLDNNConvLayer::resetFwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
if
(
cvtInVal_
)
{
pipeline
.
push_back
(
*
cvtInVal_
);
}
...
...
@@ -245,7 +243,7 @@ void MKLDNNConvLayer::resetFwdPipeline(
void
MKLDNNConvLayer
::
resetInValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
)
{
const
MatrixPtr
&
inMat
=
inputLayers_
[
0
]
->
getOutput
().
value
;
const
MatrixPtr
&
inMat
=
inputLayers_
[
0
]
->
getOutput
Value
()
;
in
=
MKLDNNMatrix
::
create
(
inMat
,
pd
->
src_primitive_desc
());
// create buffer and reorder if input value do not match
...
...
@@ -310,15 +308,20 @@ void MKLDNNConvLayer::resetOutValue(
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
value
;
memory
::
dims
outDims
=
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
};
cpuOutVal_
=
MKLDNNMatrix
::
create
(
cpuOut
,
outDims
,
format
::
nchw
,
engine_
);
if
(
cpuOutVal_
->
getPrimitiveDesc
()
!=
out
->
getPrimitiveDesc
())
{
if
(
cpuOutVal_
->
getPrimitiveDesc
()
!=
pd
->
dst_primitive_desc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
pd
->
dst_primitive_desc
());
cvtOutVal_
=
MKLDNNMatrix
::
createReorder
(
out
,
cpuOutVal_
);
CHECK
(
cvtOutVal_
)
<<
"should not be empt
r
y"
;
CHECK
(
cvtOutVal_
)
<<
"should not be empty"
;
}
else
{
// CPU output share the same data of MKLDNN output
cpuOut
->
setData
(
out
->
getData
());
cpuOutVal_
=
out
;
}
// when output is cpu device, change the mkldnn output value and make them
// share the same data. Then if next layer use inputlayer->getOuputValue()
// to achieve the input value, it will get the right data.
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
cpuOutVal_
);
return
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
out
);
}
void
MKLDNNConvLayer
::
resetBwdWgtPD
(
...
...
@@ -412,8 +415,6 @@ void MKLDNNConvLayer::resetBwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
if
(
cvtOutGrad_
)
{
pipeline
.
push_back
(
*
cvtOutGrad_
);
}
...
...
@@ -446,28 +447,27 @@ void MKLDNNConvLayer::resetBwdPipeline(
void
MKLDNNConvLayer
::
resetOutGrad
(
std
::
shared_ptr
<
conv_bwdWgt
::
primitive_desc
>&
wgtPD
,
MKLDNNMatrixPtr
&
out
)
{
const
MatrixPtr
&
outMat
=
output_
.
grad
;
out
=
MKLDNNMatrix
::
create
(
outMat
,
wgtPD
->
diff_dst_primitive_desc
());
CHECK
(
outVal_
!=
nullptr
&&
out
->
getPrimitiveDesc
()
==
outVal_
->
getPrimitiveDesc
())
<<
"primitive desc of out grad and value should be equal"
;
// TODO(TJ): merge outgrad
// create reorder if has output grad does not match
cpuOutGrad_
=
nullptr
;
cvtOutGrad_
=
nullptr
;
if
(
!
outputIsOnlyMKLDNN
())
{
CHECK
(
outVal_
!=
nullptr
&&
outVal_
->
getPrimitiveDesc
()
==
wgtPD
->
diff_dst_primitive_desc
())
<<
"primitive desc of out grad and value should be equal"
;
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
outMat
->
setData
(
cpuOut
->
getData
());
// same PrimitiveDesc with cpuInVal_
CHECK
(
cpuOutVal_
);
cpuOutGrad_
=
MKLDNNMatrix
::
create
(
cpuOut
,
cpuOutVal_
->
getPrimitiveDesc
());
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
==
out
->
getPrimitiveDesc
())
{
out
=
cpuOutGrad_
;
}
else
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
wgtPD
->
diff_dst_primitive_desc
());
// create reorder if primitive desc does not match
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
!=
outVal_
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
output_
.
grad
,
outVal_
->
getPrimitiveDesc
());
cvtOutGrad_
=
MKLDNNMatrix
::
createReorder
(
cpuOutGrad_
,
out
);
CHECK
(
cvtOutGrad_
);
}
else
{
// share the same data of CPU output
output_
.
grad
->
setData
(
cpuOut
->
getData
());
out
=
cpuOutGrad_
;
}
}
}
...
...
@@ -496,32 +496,30 @@ void MKLDNNConvLayer::resetWgtBiasGrad(
void
MKLDNNConvLayer
::
resetInGrad
(
std
::
shared_ptr
<
conv_bwdData
::
primitive_desc
>&
dataPD
,
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
cpuInGrad_
=
nullptr
;
cvtInGrad_
=
nullptr
;
if
(
dataPD
==
nullptr
)
{
return
;
}
// TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done
in
=
MKLDNNMatrix
::
create
(
inputLayers_
[
0
]
->
getOutput
().
grad
,
dataPD
->
diff_src_primitive_desc
());
CHECK
(
nullptr
!=
inVal_
&&
in
->
getPrimitiveDesc
()
==
inVal_
->
getPrimitiveDesc
())
<<
"primitive desc of input grad and value should be equal"
;
// create reorder if has output grad does not match
cpuInGrad_
=
nullptr
;
cvtInGrad_
=
nullptr
;
if
(
!
inputIsOnlyMKLDNN
())
{
if
(
inputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetInGrad
(
in
,
dataPD
->
diff_src_primitive_desc
());
CHECK
(
nullptr
!=
inVal_
&&
in
->
getPrimitiveDesc
()
==
inVal_
->
getPrimitiveDesc
())
<<
"primitive desc of input grad and value should be equal"
;
}
else
{
const
MatrixPtr
&
cpuIn
=
getInputGrad
(
0
,
CPU_DEVICE
);
// same PrimitiveDesc with cpuInVal_
CHECK
(
cpuInVal_
);
cpuInGrad_
=
MKLDNNMatrix
::
create
(
cpuIn
,
cpuInVal_
->
getPrimitiveDesc
());
if
(
cpuInGrad_
->
getPrimitiveDesc
()
!=
in
->
getPrimitiveDesc
())
{
const
MatrixPtr
&
dnnIn
=
getInputGrad
(
0
,
MKLDNN_DEVICE
);
in
=
MKLDNNMatrix
::
create
(
dnnIn
,
in
->
getPrimitiveDesc
());
in
=
cpuInGrad_
;
// create reorder if PrimitiveDesc does not match
if
(
cpuInGrad_
->
getPrimitiveDesc
()
!=
dataPD
->
diff_src_primitive_desc
())
{
in
=
MKLDNNMatrix
::
create
(
getInputGrad
(
0
,
MKLDNN_DEVICE
),
dataPD
->
diff_src_primitive_desc
());
cvtInGrad_
=
MKLDNNMatrix
::
createReorder
(
in
,
cpuInGrad_
);
CHECK
(
cvtInGrad_
);
}
else
{
in
=
cpuInGrad_
;
}
}
}
...
...
paddle/gserver/layers/MKLDNNFcLayer.cpp
浏览文件 @
17b4cea4
...
...
@@ -180,10 +180,10 @@ void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt,
void
MKLDNNFcLayer
::
resetOutValue
(
MKLDNNMatrixPtr
&
out
)
{
out
=
MKLDNNMatrix
::
create
(
output_
.
value
,
{
bs_
,
oc_
},
format
::
nc
,
engine_
);
if
(
!
outputIsOnlyMKLDNN
())
{
// fc cpu output value do not need create convert
// just share point
// fc cpu output value do not need create convert, just share data
getOutput
(
CPU_DEVICE
).
value
->
setData
(
out
->
getData
());
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
out
);
}
void
MKLDNNFcLayer
::
resetFwdPD
(
std
::
shared_ptr
<
fc_fwd
::
primitive_desc
>&
pd
,
...
...
@@ -214,8 +214,6 @@ void MKLDNNFcLayer::resetFwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
if
(
bias
)
{
fwd_
.
reset
(
new
fc_fwd
(
*
pd
,
*
in
,
*
wgt
,
*
bias
,
*
out
));
}
else
{
...
...
@@ -237,19 +235,14 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
}
void
MKLDNNFcLayer
::
resetOutGrad
(
MKLDNNMatrixPtr
&
out
)
{
// TODO(TJ): merge outgrad
int
device
=
outputIsOnlyMKLDNN
()
?
MKLDNN_DEVICE
:
CPU_DEVICE
;
output_
.
grad
->
setData
(
getOutput
(
device
).
grad
->
getData
());
// for MKLDNN device:
// can not directly cast outputgrad to mkldnnmatrix,
// since each layer can not write the inputgrad to mkldnn inputgrad.
// So just create from matrix with outputvalue format.
// for CPU device:
// fc do not need to convert from cpu device since output is always nc format
// only need create from cpu device
CHECK
(
outVal_
);
out
=
MKLDNNMatrix
::
create
(
getOutput
(
device
).
grad
,
outVal_
->
getPrimitiveDesc
());
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
output_
.
grad
->
setData
(
cpuOut
->
getData
());
out
=
MKLDNNMatrix
::
create
(
cpuOut
,
outVal_
->
getPrimitiveDesc
());
}
}
void
MKLDNNFcLayer
::
resetWgtBiasGrad
(
MKLDNNMatrixPtr
&
wgt
,
...
...
@@ -267,13 +260,11 @@ void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt,
void
MKLDNNFcLayer
::
resetInGrad
(
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
const
MatrixPtr
&
inGrad
=
inputLayers_
[
0
]
->
getOutput
().
grad
;
if
(
inGrad
==
nullptr
)
{
if
(
inputLayers_
[
0
]
->
getOutput
().
grad
==
nullptr
)
{
return
;
}
// TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done
CHECK
(
inVal_
);
in
=
MKLDNNMatrix
::
create
(
inGrad
,
inVal_
->
getPrimitiveDesc
());
MKLDNNLayer
::
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
}
void
MKLDNNFcLayer
::
resetBwdWgtPD
(
...
...
@@ -314,7 +305,6 @@ void MKLDNNFcLayer::resetBwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
CHECK
(
inVal_
);
if
(
bias
)
{
bwdWgt_
.
reset
(
new
fc_bwdWgt
(
*
bwdWgtPD
,
*
inVal_
,
*
out
,
*
wgt
,
*
bias
));
...
...
paddle/gserver/layers/MKLDNNLayer.h
浏览文件 @
17b4cea4
...
...
@@ -65,6 +65,17 @@ protected:
MKLDNNMatrixPtr
biasVal_
;
MKLDNNMatrixPtr
biasGrad_
;
// merge grad primitive
std
::
shared_ptr
<
mkldnn
::
primitive
>
mergeGrad_
;
std
::
vector
<
mkldnn
::
primitive
>
pipelineMergeGrad_
;
// tmp input argument to save input grad, only used to merge grad
Argument
tmpInArg_
;
// since mkldnn sum do not support different formats:
// can refer to https://github.com/01org/mkl-dnn/issues/134
// so need create reorder manually and save tmp MKLDNNMatrix
MKLDNNMatrixPtr
tmpOutGrad_
;
std
::
shared_ptr
<
mkldnn
::
primitive
>
tmpCvt_
;
public:
explicit
MKLDNNLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
),
...
...
@@ -99,6 +110,7 @@ public:
if
(
!
Layer
::
init
(
layerMap
,
parameterMap
))
{
return
false
;
}
setOutputMap
();
checkCPUOutputsNumber
();
stream_
.
reset
(
new
MKLDNNStream
());
...
...
@@ -118,12 +130,9 @@ public:
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" reset mkldnn forward"
;
// reset when input total sizes changed, not only the batchsize
inputElemenCnt_
=
elemenCnt
;
pipelineFwd_
.
clear
();
reshape
(
bs_
,
ic_
,
ih_
,
iw_
,
oc_
,
oh_
,
ow_
);
resetFwd
(
pipelineFwd_
,
inVal_
,
wgtVal_
,
biasVal_
,
outVal_
);
if
(
outVal_
)
{
// change original output value to mkldnn output value
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
outVal_
);
}
convertWeightsFromPaddle
();
needResetBwd_
=
true
;
}
...
...
@@ -144,9 +153,18 @@ public:
void
backward
(
const
UpdateCallback
&
callback
)
override
{
if
(
needResetBwd_
)
{
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" reset mkldnn backward"
;
pipelineBwd_
.
clear
();
pipelineMergeGrad_
.
clear
();
mergeGrad_
=
nullptr
;
resetBwd
(
pipelineBwd_
,
inGrad_
,
wgtGrad_
,
biasGrad_
,
outGrad_
);
needResetBwd_
=
false
;
}
// merge grad must before backward activation
if
(
mergeGrad_
)
{
REGISTER_TIMER_INFO
(
"MergeBpGrad"
,
getName
().
c_str
());
stream_
->
submit
(
pipelineMergeGrad_
);
}
{
REGISTER_TIMER_INFO
(
"BpActTimer"
,
getName
().
c_str
());
backwardActivation
();
...
...
@@ -247,6 +265,76 @@ protected:
}
}
/**
* reset the output grad matrix from primitive desc.
* and reset the merge grad primitive if needed.
* note: when this layer has serval outputs,
* it could not be mixed with cpu device,
* since it can not get memory desc from cpu device.
*/
virtual
void
resetOutGrad
(
MKLDNNMatrixPtr
&
out
,
mkldnn
::
memory
::
primitive_desc
pd
)
{
CHECK
(
outputIsOnlyMKLDNN
())
<<
"do not support mixed with other device yet"
;
mergeGrad_
=
nullptr
;
pipelineMergeGrad_
.
clear
();
out
=
MKLDNNMatrix
::
create
(
output_
.
grad
,
pd
);
if
(
outputMap_
.
size
()
<=
1
)
{
return
;
}
std
::
vector
<
double
>
scales
(
outputMap_
.
size
(),
1.0
);
std
::
vector
<
mkldnn
::
memory
::
primitive_desc
>
srcPDs
;
std
::
vector
<
mkldnn
::
primitive
::
at
>
srcs
;
for
(
auto
it
=
outputMap_
.
begin
();
it
!=
outputMap_
.
end
();
++
it
)
{
MKLDNNMatrixPtr
src
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
it
->
second
->
grad
);
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" has output grad "
<<
it
->
first
;
CHECK
(
src
)
<<
"should be MKLDNNMatrix"
;
auto
srcDims
=
src
->
getDims
();
auto
dstDims
=
out
->
getDims
();
CHECK_EQ
(
srcDims
.
size
(),
dstDims
.
size
());
for
(
size_t
i
=
0
;
i
<
srcDims
.
size
();
++
i
)
{
CHECK_EQ
(
srcDims
[
i
],
dstDims
[
i
]);
}
srcPDs
.
push_back
(
src
->
getPrimitiveDesc
());
srcs
.
push_back
(
*
src
);
}
// TODO(TJ): remove me when mkldnn sum support different formats
for
(
size_t
i
=
1
;
i
<
srcPDs
.
size
();
++
i
)
{
CHECK
(
srcPDs
[
0
]
==
srcPDs
[
i
]);
}
tmpOutGrad_
=
nullptr
;
tmpCvt_
=
nullptr
;
if
(
out
->
getPrimitiveDesc
()
!=
srcPDs
[
0
])
{
tmpOutGrad_
=
MKLDNNMatrix
::
create
(
nullptr
,
srcPDs
[
0
]);
tmpCvt_
=
MKLDNNMatrix
::
createReorder
(
tmpOutGrad_
,
out
);
CHECK
(
tmpCvt_
);
pipelineMergeGrad_
.
push_back
(
*
tmpCvt_
);
}
else
{
tmpOutGrad_
=
out
;
}
auto
sumPD
=
mkldnn
::
sum
::
primitive_desc
(
tmpOutGrad_
->
getMemoryDesc
(),
scales
,
srcPDs
);
mergeGrad_
.
reset
(
new
mkldnn
::
sum
(
sumPD
,
srcs
,
*
tmpOutGrad_
));
pipelineMergeGrad_
.
insert
(
pipelineMergeGrad_
.
begin
(),
*
mergeGrad_
);
}
/**
* reset input grad from primitive desc.
* this function is avaiable for input is only mkldnn
* or input do not care cpu device
*/
virtual
void
resetInGrad
(
MKLDNNMatrixPtr
&
in
,
mkldnn
::
memory
::
primitive_desc
pd
)
{
LayerPtr
&
input
=
inputLayers_
[
0
];
const
MatrixPtr
&
grad
=
input
->
getOutputMapSize
()
>
1
?
nullptr
:
input
->
getOutput
().
grad
;
in
=
MKLDNNMatrix
::
create
(
grad
,
pd
);
Argument
&
arg
=
input
->
getOutput
(
this
->
getName
());
arg
.
grad
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
in
);
}
/**
* print info about sizes
*/
...
...
@@ -334,6 +422,16 @@ private:
}
}
/**
* Set output map of prev layers.
*/
void
setOutputMap
()
{
outputMap_
.
clear
();
for
(
size_t
i
=
0
;
i
<
inputLayers_
.
size
();
++
i
)
{
inputLayers_
[
i
]
->
setOutput
(
getName
(),
&
tmpInArg_
);
}
}
/**
* Check the cpu device number of outputOtherDevice_.
* should have only one at most.
...
...
paddle/gserver/layers/MKLDNNPoolLayer.cpp
浏览文件 @
17b4cea4
...
...
@@ -142,14 +142,16 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) {
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
value
;
cpuOutVal_
=
MKLDNNMatrix
::
create
(
cpuOut
,
outDims
,
format
::
nchw
,
engine_
);
if
(
cpuOutVal_
->
getPrimitiveDesc
()
!=
out
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
out
->
getPrimitiveDesc
());
cvtOutVal_
=
MKLDNNMatrix
::
createReorder
(
out
,
cpuOutVal_
);
CHECK
(
cvtOutVal_
)
<<
"should not be emptry"
;
}
else
{
// CPU output share the same data of MKLDNN output
cpuOut
->
setData
(
out
->
getData
());
cpuOutVal_
=
out
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
cpuOutVal_
);
return
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
outVal_
);
}
void
MKLDNNPoolLayer
::
resetFwdPD
(
std
::
shared_ptr
<
pool_fwd
::
primitive_desc
>&
pd
,
...
...
@@ -187,7 +189,6 @@ void MKLDNNPoolLayer::resetFwdPipeline(
std
::
shared_ptr
<
pool_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
fwd_
=
workspace_
?
std
::
make_shared
<
pool_fwd
>
(
pool_fwd
(
*
pd
,
*
in
,
*
out
,
*
workspace_
))
:
std
::
make_shared
<
pool_fwd
>
(
pool_fwd
(
*
pd
,
*
in
,
*
out
));
...
...
@@ -205,17 +206,17 @@ void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
resetInGrad
(
in
);
}
void
MKLDNNPoolLayer
::
resetOutGrad
(
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
outVal_
)
<<
"Should have output value"
;
out
=
MKLDNNMatrix
::
create
(
output_
.
grad
,
outVal_
->
getPrimitiveDesc
());
// create reorder if output value has cpu device and pd do not match
cpuOutGrad_
=
nullptr
;
cvtOutGrad_
=
nullptr
;
if
(
!
outputIsOnlyMKLDNN
())
{
CHECK
(
outVal_
);
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
cpuOutGrad_
=
MKLDNNMatrix
::
create
(
cpuOut
,
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
},
format
::
nchw
,
engine_
);
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
!=
out
->
getPrimitiveDesc
())
{
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
!=
outVal_
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
output_
.
grad
,
outVal_
->
getPrimitiveDesc
());
cvtOutGrad_
=
MKLDNNMatrix
::
createReorder
(
cpuOutGrad_
,
out
);
CHECK
(
cvtOutGrad_
)
<<
"should not be emptry"
;
}
else
{
...
...
@@ -228,12 +229,11 @@ void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) {
void
MKLDNNPoolLayer
::
resetInGrad
(
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
const
MatrixPtr
&
inGrad
=
inputLayers_
[
0
]
->
getOutput
().
grad
;
if
(
inGrad
==
nullptr
)
{
if
(
inputLayers_
[
0
]
->
getOutput
().
grad
==
nullptr
)
{
return
;
}
CHECK
(
inVal_
);
in
=
MKLDNNMatrix
::
create
(
inGrad
,
inVal_
->
getPrimitiveDesc
());
MKLDNNLayer
::
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
}
void
MKLDNNPoolLayer
::
resetBwdPD
(
std
::
shared_ptr
<
pool_bwd
::
primitive_desc
>&
pd
,
...
...
@@ -261,7 +261,6 @@ void MKLDNNPoolLayer::resetBwdPipeline(
std
::
shared_ptr
<
pool_bwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
if
(
cvtOutGrad_
)
{
pipeline
.
push_back
(
*
cvtOutGrad_
);
}
...
...
paddle/gserver/tests/MKLDNNTester.cpp
浏览文件 @
17b4cea4
...
...
@@ -124,8 +124,8 @@ void MKLDNNTester::randomTopDiffs() {
void
MKLDNNTester
::
checkForward
()
{
VLOG
(
MKLDNN_ALL
)
<<
"Check Forward"
;
printTopDatas
();
double
delta
=
compareMatrix
(
dnnLayer_
->
getOutput
(
CPU_DEVICE
).
value
,
refLayer_
->
getOutputValue
());
double
delta
=
compareMatrix
(
dnnLayer_
->
getOutputValue
(),
refLayer_
->
getOutputValue
());
EXPECT_LE
(
fabs
(
delta
),
eps_
);
}
...
...
paddle/trainer/tests/CMakeLists.txt
浏览文件 @
17b4cea4
...
...
@@ -39,15 +39,18 @@ add_test(NAME test_CompareTwoNets
################ test_CompareMKLDNNandCPU ######################
if
(
WITH_MKLDNN
)
add_unittest_without_exec
(
test_CompareMKLDNNandCPU
test_CompareTwoNets.cpp
)
add_test
(
NAME test_CompareMKLDNNandCPU
COMMAND
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh -d
${
PADDLE_SOURCE_DIR
}
/python/
${
CMAKE_CURRENT_BINARY_DIR
}
/test_CompareMKLDNNandCPU
--config_file_a=trainer/tests/sample_trainer_config_simple_net.conf --use_mkldnn_a=True
--config_file_b=trainer/tests/sample_trainer_config_simple_net.conf --use_mkldnn_b=False
--use_gpu=False
WORKING_DIRECTORY
${
PADDLE_SOURCE_DIR
}
/paddle/
)
macro
(
gen_command VAR_NAME CONFIG_FILE
)
set
(
${
VAR_NAME
}
"
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh"
"-d"
"
${
PADDLE_SOURCE_DIR
}
/python/"
"
${
CMAKE_CURRENT_BINARY_DIR
}
/test_CompareMKLDNNandCPU --use_gpu=False"
"--config_file_a=trainer/tests/
${
CONFIG_FILE
}
--use_mkldnn_a=True"
"--config_file_b=trainer/tests/
${
CONFIG_FILE
}
--use_mkldnn_b=False"
"WORKING_DIRECTORY"
"
${
PADDLE_SOURCE_DIR
}
/paddle/"
)
endmacro
()
add_unittest_without_exec
(
test_CompareMKLDNNandCPU test_CompareTwoNets.cpp
)
gen_command
(
compare_simple_net
"sample_trainer_config_simple_net.conf"
)
gen_command
(
compare_branch_net
"sample_trainer_config_branch_net.conf"
)
add_test
(
NAME test_CompareMKLDNNandCPU_simple_net COMMAND
${
compare_simple_net
}
)
add_test
(
NAME test_CompareMKLDNNandCPU_branch_net COMMAND
${
compare_branch_net
}
)
endif
()
############### test_CompareTwoOpts ###################
...
...
paddle/trainer/tests/sample_trainer_config_branch_net.conf
0 → 100644
浏览文件 @
17b4cea4
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle
.
trainer_config_helpers
import
*
################################### Data Configuration ###################################
TrainData
(
ProtoData
(
files
=
"trainer/tests/mnist.list"
))
################################### Algorithm Configuration ###################################
settings
(
batch_size
=
256
,
learning_method
=
MomentumOptimizer
(
momentum
=
0
.
5
,
sparse
=
False
))
################################### Network Configuration ###################################
data
=
data_layer
(
name
=
"input"
,
size
=
784
)
tmp
=
img_conv_layer
(
input
=
data
,
num_channels
=
1
,
filter_size
=
3
,
num_filters
=
32
,
padding
=
1
,
shared_biases
=
True
,
act
=
ReluActivation
())
a1
=
img_conv_layer
(
input
=
tmp
,
filter_size
=
1
,
num_filters
=
32
,
padding
=
0
,
shared_biases
=
True
,
act
=
ReluActivation
())
a2
=
img_conv_layer
(
input
=
tmp
,
filter_size
=
3
,
num_filters
=
32
,
padding
=
1
,
shared_biases
=
True
,
act
=
ReluActivation
())
tmp
=
concat_layer
(
input
=[
a1
,
a2
])
tmp
=
img_pool_layer
(
input
=
tmp
,
num_channels
=
64
,
pool_size
=
3
,
stride
=
2
,
padding
=
1
,
pool_type
=
AvgPooling
())
b1
=
img_conv_layer
(
input
=
tmp
,
filter_size
=
3
,
num_filters
=
64
,
padding
=
1
,
shared_biases
=
True
,
act
=
ReluActivation
())
b1
=
img_pool_layer
(
input
=
b1
,
pool_size
=
3
,
stride
=
1
,
padding
=
1
,
pool_type
=
MaxPooling
())
b2
=
img_conv_layer
(
input
=
tmp
,
filter_size
=
5
,
num_filters
=
64
,
padding
=
2
,
shared_biases
=
True
,
act
=
ReluActivation
())
b2
=
img_pool_layer
(
input
=
b2
,
pool_size
=
5
,
stride
=
1
,
padding
=
2
,
pool_type
=
MaxPooling
())
tmp
=
addto_layer
(
input
=[
b1
,
b2
],
act
=
ReluActivation
(),
bias_attr
=
False
)
tmp
=
img_pool_layer
(
input
=
tmp
,
pool_size
=
3
,
stride
=
2
,
padding
=
1
,
pool_type
=
MaxPooling
())
tmp
=
fc_layer
(
input
=
tmp
,
size
=
64
,
bias_attr
=
False
,
act
=
TanhActivation
())
output
=
fc_layer
(
input
=
tmp
,
size
=
10
,
bias_attr
=
True
,
act
=
SoftmaxActivation
())
lbl
=
data_layer
(
name
=
"label"
,
size
=
10
)
cost
=
classification_cost
(
input
=
output
,
label
=
lbl
)
outputs
(
cost
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录