Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
17b4cea4
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
17b4cea4
编写于
10月 15, 2017
作者:
T
Tao Luo
提交者:
GitHub
10月 15, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4759 from tensor-tang/merge_grad
Merge mkldnn output grad
上级
e593113a
7a7c8fd9
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
285 addition
and
88 deletion
+285
-88
paddle/gserver/layers/Layer.h
paddle/gserver/layers/Layer.h
+6
-0
paddle/gserver/layers/MKLDNNConvLayer.cpp
paddle/gserver/layers/MKLDNNConvLayer.cpp
+37
-39
paddle/gserver/layers/MKLDNNFcLayer.cpp
paddle/gserver/layers/MKLDNNFcLayer.cpp
+11
-21
paddle/gserver/layers/MKLDNNLayer.h
paddle/gserver/layers/MKLDNNLayer.h
+102
-4
paddle/gserver/layers/MKLDNNPoolLayer.cpp
paddle/gserver/layers/MKLDNNPoolLayer.cpp
+12
-13
paddle/gserver/tests/MKLDNNTester.cpp
paddle/gserver/tests/MKLDNNTester.cpp
+2
-2
paddle/trainer/tests/CMakeLists.txt
paddle/trainer/tests/CMakeLists.txt
+12
-9
paddle/trainer/tests/sample_trainer_config_branch_net.conf
paddle/trainer/tests/sample_trainer_config_branch_net.conf
+103
-0
未找到文件。
paddle/gserver/layers/Layer.h
浏览文件 @
17b4cea4
...
...
@@ -86,6 +86,7 @@ protected:
/// Also used in 'use_mkldnn' case.
std
::
vector
<
Argument
>
outputOtherDevice_
;
/// If there are several outputs, map them by each name.
/// MKLDNNLayer use it only to merge output grad
std
::
map
<
std
::
string
,
Argument
*>
outputMap_
;
/// Used to merge grad on different devices.
MatrixPtr
tmpGrad_
;
...
...
@@ -325,6 +326,11 @@ public:
outputMap_
[
name
]
=
output
;
}
/**
* Get the output map size, if layer has multi-output.
*/
size_t
getOutputMapSize
()
{
return
outputMap_
.
size
();
}
/**
* Get the output based on layer's name.
*/
...
...
paddle/gserver/layers/MKLDNNConvLayer.cpp
浏览文件 @
17b4cea4
...
...
@@ -225,8 +225,6 @@ void MKLDNNConvLayer::resetFwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
if
(
cvtInVal_
)
{
pipeline
.
push_back
(
*
cvtInVal_
);
}
...
...
@@ -245,7 +243,7 @@ void MKLDNNConvLayer::resetFwdPipeline(
void
MKLDNNConvLayer
::
resetInValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
)
{
const
MatrixPtr
&
inMat
=
inputLayers_
[
0
]
->
getOutput
().
value
;
const
MatrixPtr
&
inMat
=
inputLayers_
[
0
]
->
getOutput
Value
()
;
in
=
MKLDNNMatrix
::
create
(
inMat
,
pd
->
src_primitive_desc
());
// create buffer and reorder if input value do not match
...
...
@@ -310,15 +308,20 @@ void MKLDNNConvLayer::resetOutValue(
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
value
;
memory
::
dims
outDims
=
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
};
cpuOutVal_
=
MKLDNNMatrix
::
create
(
cpuOut
,
outDims
,
format
::
nchw
,
engine_
);
if
(
cpuOutVal_
->
getPrimitiveDesc
()
!=
out
->
getPrimitiveDesc
())
{
if
(
cpuOutVal_
->
getPrimitiveDesc
()
!=
pd
->
dst_primitive_desc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
pd
->
dst_primitive_desc
());
cvtOutVal_
=
MKLDNNMatrix
::
createReorder
(
out
,
cpuOutVal_
);
CHECK
(
cvtOutVal_
)
<<
"should not be empt
r
y"
;
CHECK
(
cvtOutVal_
)
<<
"should not be empty"
;
}
else
{
// CPU output share the same data of MKLDNN output
cpuOut
->
setData
(
out
->
getData
());
cpuOutVal_
=
out
;
}
// when output is cpu device, change the mkldnn output value and make them
// share the same data. Then if next layer use inputlayer->getOuputValue()
// to achieve the input value, it will get the right data.
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
cpuOutVal_
);
return
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
out
);
}
void
MKLDNNConvLayer
::
resetBwdWgtPD
(
...
...
@@ -412,8 +415,6 @@ void MKLDNNConvLayer::resetBwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
if
(
cvtOutGrad_
)
{
pipeline
.
push_back
(
*
cvtOutGrad_
);
}
...
...
@@ -446,28 +447,27 @@ void MKLDNNConvLayer::resetBwdPipeline(
void
MKLDNNConvLayer
::
resetOutGrad
(
std
::
shared_ptr
<
conv_bwdWgt
::
primitive_desc
>&
wgtPD
,
MKLDNNMatrixPtr
&
out
)
{
const
MatrixPtr
&
outMat
=
output_
.
grad
;
out
=
MKLDNNMatrix
::
create
(
outMat
,
wgtPD
->
diff_dst_primitive_desc
());
CHECK
(
outVal_
!=
nullptr
&&
out
->
getPrimitiveDesc
()
==
outVal_
->
getPrimitiveDesc
())
<<
"primitive desc of out grad and value should be equal"
;
// TODO(TJ): merge outgrad
// create reorder if has output grad does not match
cpuOutGrad_
=
nullptr
;
cvtOutGrad_
=
nullptr
;
if
(
!
outputIsOnlyMKLDNN
())
{
CHECK
(
outVal_
!=
nullptr
&&
outVal_
->
getPrimitiveDesc
()
==
wgtPD
->
diff_dst_primitive_desc
())
<<
"primitive desc of out grad and value should be equal"
;
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
outMat
->
setData
(
cpuOut
->
getData
());
// same PrimitiveDesc with cpuInVal_
CHECK
(
cpuOutVal_
);
cpuOutGrad_
=
MKLDNNMatrix
::
create
(
cpuOut
,
cpuOutVal_
->
getPrimitiveDesc
());
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
==
out
->
getPrimitiveDesc
())
{
out
=
cpuOutGrad_
;
}
else
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
wgtPD
->
diff_dst_primitive_desc
());
// create reorder if primitive desc does not match
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
!=
outVal_
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
output_
.
grad
,
outVal_
->
getPrimitiveDesc
());
cvtOutGrad_
=
MKLDNNMatrix
::
createReorder
(
cpuOutGrad_
,
out
);
CHECK
(
cvtOutGrad_
);
}
else
{
// share the same data of CPU output
output_
.
grad
->
setData
(
cpuOut
->
getData
());
out
=
cpuOutGrad_
;
}
}
}
...
...
@@ -496,32 +496,30 @@ void MKLDNNConvLayer::resetWgtBiasGrad(
void
MKLDNNConvLayer
::
resetInGrad
(
std
::
shared_ptr
<
conv_bwdData
::
primitive_desc
>&
dataPD
,
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
cpuInGrad_
=
nullptr
;
cvtInGrad_
=
nullptr
;
if
(
dataPD
==
nullptr
)
{
return
;
}
// TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done
in
=
MKLDNNMatrix
::
create
(
inputLayers_
[
0
]
->
getOutput
().
grad
,
dataPD
->
diff_src_primitive_desc
());
CHECK
(
nullptr
!=
inVal_
&&
in
->
getPrimitiveDesc
()
==
inVal_
->
getPrimitiveDesc
())
<<
"primitive desc of input grad and value should be equal"
;
// create reorder if has output grad does not match
cpuInGrad_
=
nullptr
;
cvtInGrad_
=
nullptr
;
if
(
!
inputIsOnlyMKLDNN
())
{
if
(
inputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetInGrad
(
in
,
dataPD
->
diff_src_primitive_desc
());
CHECK
(
nullptr
!=
inVal_
&&
in
->
getPrimitiveDesc
()
==
inVal_
->
getPrimitiveDesc
())
<<
"primitive desc of input grad and value should be equal"
;
}
else
{
const
MatrixPtr
&
cpuIn
=
getInputGrad
(
0
,
CPU_DEVICE
);
// same PrimitiveDesc with cpuInVal_
CHECK
(
cpuInVal_
);
cpuInGrad_
=
MKLDNNMatrix
::
create
(
cpuIn
,
cpuInVal_
->
getPrimitiveDesc
());
if
(
cpuInGrad_
->
getPrimitiveDesc
()
!=
in
->
getPrimitiveDesc
())
{
const
MatrixPtr
&
dnnIn
=
getInputGrad
(
0
,
MKLDNN_DEVICE
);
in
=
MKLDNNMatrix
::
create
(
dnnIn
,
in
->
getPrimitiveDesc
());
in
=
cpuInGrad_
;
// create reorder if PrimitiveDesc does not match
if
(
cpuInGrad_
->
getPrimitiveDesc
()
!=
dataPD
->
diff_src_primitive_desc
())
{
in
=
MKLDNNMatrix
::
create
(
getInputGrad
(
0
,
MKLDNN_DEVICE
),
dataPD
->
diff_src_primitive_desc
());
cvtInGrad_
=
MKLDNNMatrix
::
createReorder
(
in
,
cpuInGrad_
);
CHECK
(
cvtInGrad_
);
}
else
{
in
=
cpuInGrad_
;
}
}
}
...
...
paddle/gserver/layers/MKLDNNFcLayer.cpp
浏览文件 @
17b4cea4
...
...
@@ -180,10 +180,10 @@ void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt,
void
MKLDNNFcLayer
::
resetOutValue
(
MKLDNNMatrixPtr
&
out
)
{
out
=
MKLDNNMatrix
::
create
(
output_
.
value
,
{
bs_
,
oc_
},
format
::
nc
,
engine_
);
if
(
!
outputIsOnlyMKLDNN
())
{
// fc cpu output value do not need create convert
// just share point
// fc cpu output value do not need create convert, just share data
getOutput
(
CPU_DEVICE
).
value
->
setData
(
out
->
getData
());
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
out
);
}
void
MKLDNNFcLayer
::
resetFwdPD
(
std
::
shared_ptr
<
fc_fwd
::
primitive_desc
>&
pd
,
...
...
@@ -214,8 +214,6 @@ void MKLDNNFcLayer::resetFwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
if
(
bias
)
{
fwd_
.
reset
(
new
fc_fwd
(
*
pd
,
*
in
,
*
wgt
,
*
bias
,
*
out
));
}
else
{
...
...
@@ -237,19 +235,14 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
}
void
MKLDNNFcLayer
::
resetOutGrad
(
MKLDNNMatrixPtr
&
out
)
{
// TODO(TJ): merge outgrad
int
device
=
outputIsOnlyMKLDNN
()
?
MKLDNN_DEVICE
:
CPU_DEVICE
;
output_
.
grad
->
setData
(
getOutput
(
device
).
grad
->
getData
());
// for MKLDNN device:
// can not directly cast outputgrad to mkldnnmatrix,
// since each layer can not write the inputgrad to mkldnn inputgrad.
// So just create from matrix with outputvalue format.
// for CPU device:
// fc do not need to convert from cpu device since output is always nc format
// only need create from cpu device
CHECK
(
outVal_
);
out
=
MKLDNNMatrix
::
create
(
getOutput
(
device
).
grad
,
outVal_
->
getPrimitiveDesc
());
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
output_
.
grad
->
setData
(
cpuOut
->
getData
());
out
=
MKLDNNMatrix
::
create
(
cpuOut
,
outVal_
->
getPrimitiveDesc
());
}
}
void
MKLDNNFcLayer
::
resetWgtBiasGrad
(
MKLDNNMatrixPtr
&
wgt
,
...
...
@@ -267,13 +260,11 @@ void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt,
void
MKLDNNFcLayer
::
resetInGrad
(
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
const
MatrixPtr
&
inGrad
=
inputLayers_
[
0
]
->
getOutput
().
grad
;
if
(
inGrad
==
nullptr
)
{
if
(
inputLayers_
[
0
]
->
getOutput
().
grad
==
nullptr
)
{
return
;
}
// TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done
CHECK
(
inVal_
);
in
=
MKLDNNMatrix
::
create
(
inGrad
,
inVal_
->
getPrimitiveDesc
());
MKLDNNLayer
::
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
}
void
MKLDNNFcLayer
::
resetBwdWgtPD
(
...
...
@@ -314,7 +305,6 @@ void MKLDNNFcLayer::resetBwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
CHECK
(
inVal_
);
if
(
bias
)
{
bwdWgt_
.
reset
(
new
fc_bwdWgt
(
*
bwdWgtPD
,
*
inVal_
,
*
out
,
*
wgt
,
*
bias
));
...
...
paddle/gserver/layers/MKLDNNLayer.h
浏览文件 @
17b4cea4
...
...
@@ -65,6 +65,17 @@ protected:
MKLDNNMatrixPtr
biasVal_
;
MKLDNNMatrixPtr
biasGrad_
;
// merge grad primitive
std
::
shared_ptr
<
mkldnn
::
primitive
>
mergeGrad_
;
std
::
vector
<
mkldnn
::
primitive
>
pipelineMergeGrad_
;
// tmp input argument to save input grad, only used to merge grad
Argument
tmpInArg_
;
// since mkldnn sum do not support different formats:
// can refer to https://github.com/01org/mkl-dnn/issues/134
// so need create reorder manually and save tmp MKLDNNMatrix
MKLDNNMatrixPtr
tmpOutGrad_
;
std
::
shared_ptr
<
mkldnn
::
primitive
>
tmpCvt_
;
public:
explicit
MKLDNNLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
),
...
...
@@ -99,6 +110,7 @@ public:
if
(
!
Layer
::
init
(
layerMap
,
parameterMap
))
{
return
false
;
}
setOutputMap
();
checkCPUOutputsNumber
();
stream_
.
reset
(
new
MKLDNNStream
());
...
...
@@ -118,12 +130,9 @@ public:
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" reset mkldnn forward"
;
// reset when input total sizes changed, not only the batchsize
inputElemenCnt_
=
elemenCnt
;
pipelineFwd_
.
clear
();
reshape
(
bs_
,
ic_
,
ih_
,
iw_
,
oc_
,
oh_
,
ow_
);
resetFwd
(
pipelineFwd_
,
inVal_
,
wgtVal_
,
biasVal_
,
outVal_
);
if
(
outVal_
)
{
// change original output value to mkldnn output value
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
outVal_
);
}
convertWeightsFromPaddle
();
needResetBwd_
=
true
;
}
...
...
@@ -144,9 +153,18 @@ public:
void
backward
(
const
UpdateCallback
&
callback
)
override
{
if
(
needResetBwd_
)
{
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" reset mkldnn backward"
;
pipelineBwd_
.
clear
();
pipelineMergeGrad_
.
clear
();
mergeGrad_
=
nullptr
;
resetBwd
(
pipelineBwd_
,
inGrad_
,
wgtGrad_
,
biasGrad_
,
outGrad_
);
needResetBwd_
=
false
;
}
// merge grad must before backward activation
if
(
mergeGrad_
)
{
REGISTER_TIMER_INFO
(
"MergeBpGrad"
,
getName
().
c_str
());
stream_
->
submit
(
pipelineMergeGrad_
);
}
{
REGISTER_TIMER_INFO
(
"BpActTimer"
,
getName
().
c_str
());
backwardActivation
();
...
...
@@ -247,6 +265,76 @@ protected:
}
}
/**
* reset the output grad matrix from primitive desc.
* and reset the merge grad primitive if needed.
* note: when this layer has serval outputs,
* it could not be mixed with cpu device,
* since it can not get memory desc from cpu device.
*/
virtual
void
resetOutGrad
(
MKLDNNMatrixPtr
&
out
,
mkldnn
::
memory
::
primitive_desc
pd
)
{
CHECK
(
outputIsOnlyMKLDNN
())
<<
"do not support mixed with other device yet"
;
mergeGrad_
=
nullptr
;
pipelineMergeGrad_
.
clear
();
out
=
MKLDNNMatrix
::
create
(
output_
.
grad
,
pd
);
if
(
outputMap_
.
size
()
<=
1
)
{
return
;
}
std
::
vector
<
double
>
scales
(
outputMap_
.
size
(),
1.0
);
std
::
vector
<
mkldnn
::
memory
::
primitive_desc
>
srcPDs
;
std
::
vector
<
mkldnn
::
primitive
::
at
>
srcs
;
for
(
auto
it
=
outputMap_
.
begin
();
it
!=
outputMap_
.
end
();
++
it
)
{
MKLDNNMatrixPtr
src
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
it
->
second
->
grad
);
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" has output grad "
<<
it
->
first
;
CHECK
(
src
)
<<
"should be MKLDNNMatrix"
;
auto
srcDims
=
src
->
getDims
();
auto
dstDims
=
out
->
getDims
();
CHECK_EQ
(
srcDims
.
size
(),
dstDims
.
size
());
for
(
size_t
i
=
0
;
i
<
srcDims
.
size
();
++
i
)
{
CHECK_EQ
(
srcDims
[
i
],
dstDims
[
i
]);
}
srcPDs
.
push_back
(
src
->
getPrimitiveDesc
());
srcs
.
push_back
(
*
src
);
}
// TODO(TJ): remove me when mkldnn sum support different formats
for
(
size_t
i
=
1
;
i
<
srcPDs
.
size
();
++
i
)
{
CHECK
(
srcPDs
[
0
]
==
srcPDs
[
i
]);
}
tmpOutGrad_
=
nullptr
;
tmpCvt_
=
nullptr
;
if
(
out
->
getPrimitiveDesc
()
!=
srcPDs
[
0
])
{
tmpOutGrad_
=
MKLDNNMatrix
::
create
(
nullptr
,
srcPDs
[
0
]);
tmpCvt_
=
MKLDNNMatrix
::
createReorder
(
tmpOutGrad_
,
out
);
CHECK
(
tmpCvt_
);
pipelineMergeGrad_
.
push_back
(
*
tmpCvt_
);
}
else
{
tmpOutGrad_
=
out
;
}
auto
sumPD
=
mkldnn
::
sum
::
primitive_desc
(
tmpOutGrad_
->
getMemoryDesc
(),
scales
,
srcPDs
);
mergeGrad_
.
reset
(
new
mkldnn
::
sum
(
sumPD
,
srcs
,
*
tmpOutGrad_
));
pipelineMergeGrad_
.
insert
(
pipelineMergeGrad_
.
begin
(),
*
mergeGrad_
);
}
/**
* reset input grad from primitive desc.
* this function is avaiable for input is only mkldnn
* or input do not care cpu device
*/
virtual
void
resetInGrad
(
MKLDNNMatrixPtr
&
in
,
mkldnn
::
memory
::
primitive_desc
pd
)
{
LayerPtr
&
input
=
inputLayers_
[
0
];
const
MatrixPtr
&
grad
=
input
->
getOutputMapSize
()
>
1
?
nullptr
:
input
->
getOutput
().
grad
;
in
=
MKLDNNMatrix
::
create
(
grad
,
pd
);
Argument
&
arg
=
input
->
getOutput
(
this
->
getName
());
arg
.
grad
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
in
);
}
/**
* print info about sizes
*/
...
...
@@ -334,6 +422,16 @@ private:
}
}
/**
* Set output map of prev layers.
*/
void
setOutputMap
()
{
outputMap_
.
clear
();
for
(
size_t
i
=
0
;
i
<
inputLayers_
.
size
();
++
i
)
{
inputLayers_
[
i
]
->
setOutput
(
getName
(),
&
tmpInArg_
);
}
}
/**
* Check the cpu device number of outputOtherDevice_.
* should have only one at most.
...
...
paddle/gserver/layers/MKLDNNPoolLayer.cpp
浏览文件 @
17b4cea4
...
...
@@ -142,14 +142,16 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) {
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
value
;
cpuOutVal_
=
MKLDNNMatrix
::
create
(
cpuOut
,
outDims
,
format
::
nchw
,
engine_
);
if
(
cpuOutVal_
->
getPrimitiveDesc
()
!=
out
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
out
->
getPrimitiveDesc
());
cvtOutVal_
=
MKLDNNMatrix
::
createReorder
(
out
,
cpuOutVal_
);
CHECK
(
cvtOutVal_
)
<<
"should not be emptry"
;
}
else
{
// CPU output share the same data of MKLDNN output
cpuOut
->
setData
(
out
->
getData
());
cpuOutVal_
=
out
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
cpuOutVal_
);
return
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
outVal_
);
}
void
MKLDNNPoolLayer
::
resetFwdPD
(
std
::
shared_ptr
<
pool_fwd
::
primitive_desc
>&
pd
,
...
...
@@ -187,7 +189,6 @@ void MKLDNNPoolLayer::resetFwdPipeline(
std
::
shared_ptr
<
pool_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
fwd_
=
workspace_
?
std
::
make_shared
<
pool_fwd
>
(
pool_fwd
(
*
pd
,
*
in
,
*
out
,
*
workspace_
))
:
std
::
make_shared
<
pool_fwd
>
(
pool_fwd
(
*
pd
,
*
in
,
*
out
));
...
...
@@ -205,17 +206,17 @@ void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
resetInGrad
(
in
);
}
void
MKLDNNPoolLayer
::
resetOutGrad
(
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
outVal_
)
<<
"Should have output value"
;
out
=
MKLDNNMatrix
::
create
(
output_
.
grad
,
outVal_
->
getPrimitiveDesc
());
// create reorder if output value has cpu device and pd do not match
cpuOutGrad_
=
nullptr
;
cvtOutGrad_
=
nullptr
;
if
(
!
outputIsOnlyMKLDNN
())
{
CHECK
(
outVal_
);
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
cpuOutGrad_
=
MKLDNNMatrix
::
create
(
cpuOut
,
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
},
format
::
nchw
,
engine_
);
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
!=
out
->
getPrimitiveDesc
())
{
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
!=
outVal_
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
output_
.
grad
,
outVal_
->
getPrimitiveDesc
());
cvtOutGrad_
=
MKLDNNMatrix
::
createReorder
(
cpuOutGrad_
,
out
);
CHECK
(
cvtOutGrad_
)
<<
"should not be emptry"
;
}
else
{
...
...
@@ -228,12 +229,11 @@ void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) {
void
MKLDNNPoolLayer
::
resetInGrad
(
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
const
MatrixPtr
&
inGrad
=
inputLayers_
[
0
]
->
getOutput
().
grad
;
if
(
inGrad
==
nullptr
)
{
if
(
inputLayers_
[
0
]
->
getOutput
().
grad
==
nullptr
)
{
return
;
}
CHECK
(
inVal_
);
in
=
MKLDNNMatrix
::
create
(
inGrad
,
inVal_
->
getPrimitiveDesc
());
MKLDNNLayer
::
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
}
void
MKLDNNPoolLayer
::
resetBwdPD
(
std
::
shared_ptr
<
pool_bwd
::
primitive_desc
>&
pd
,
...
...
@@ -261,7 +261,6 @@ void MKLDNNPoolLayer::resetBwdPipeline(
std
::
shared_ptr
<
pool_bwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
if
(
cvtOutGrad_
)
{
pipeline
.
push_back
(
*
cvtOutGrad_
);
}
...
...
paddle/gserver/tests/MKLDNNTester.cpp
浏览文件 @
17b4cea4
...
...
@@ -124,8 +124,8 @@ void MKLDNNTester::randomTopDiffs() {
void
MKLDNNTester
::
checkForward
()
{
VLOG
(
MKLDNN_ALL
)
<<
"Check Forward"
;
printTopDatas
();
double
delta
=
compareMatrix
(
dnnLayer_
->
getOutput
(
CPU_DEVICE
).
value
,
refLayer_
->
getOutputValue
());
double
delta
=
compareMatrix
(
dnnLayer_
->
getOutputValue
(),
refLayer_
->
getOutputValue
());
EXPECT_LE
(
fabs
(
delta
),
eps_
);
}
...
...
paddle/trainer/tests/CMakeLists.txt
浏览文件 @
17b4cea4
...
...
@@ -39,15 +39,18 @@ add_test(NAME test_CompareTwoNets
################ test_CompareMKLDNNandCPU ######################
if
(
WITH_MKLDNN
)
add_unittest_without_exec
(
test_CompareMKLDNNandCPU
test_CompareTwoNets.cpp
)
add_test
(
NAME test_CompareMKLDNNandCPU
COMMAND
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh -d
${
PADDLE_SOURCE_DIR
}
/python/
${
CMAKE_CURRENT_BINARY_DIR
}
/test_CompareMKLDNNandCPU
--config_file_a=trainer/tests/sample_trainer_config_simple_net.conf --use_mkldnn_a=True
--config_file_b=trainer/tests/sample_trainer_config_simple_net.conf --use_mkldnn_b=False
--use_gpu=False
WORKING_DIRECTORY
${
PADDLE_SOURCE_DIR
}
/paddle/
)
macro
(
gen_command VAR_NAME CONFIG_FILE
)
set
(
${
VAR_NAME
}
"
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh"
"-d"
"
${
PADDLE_SOURCE_DIR
}
/python/"
"
${
CMAKE_CURRENT_BINARY_DIR
}
/test_CompareMKLDNNandCPU --use_gpu=False"
"--config_file_a=trainer/tests/
${
CONFIG_FILE
}
--use_mkldnn_a=True"
"--config_file_b=trainer/tests/
${
CONFIG_FILE
}
--use_mkldnn_b=False"
"WORKING_DIRECTORY"
"
${
PADDLE_SOURCE_DIR
}
/paddle/"
)
endmacro
()
add_unittest_without_exec
(
test_CompareMKLDNNandCPU test_CompareTwoNets.cpp
)
gen_command
(
compare_simple_net
"sample_trainer_config_simple_net.conf"
)
gen_command
(
compare_branch_net
"sample_trainer_config_branch_net.conf"
)
add_test
(
NAME test_CompareMKLDNNandCPU_simple_net COMMAND
${
compare_simple_net
}
)
add_test
(
NAME test_CompareMKLDNNandCPU_branch_net COMMAND
${
compare_branch_net
}
)
endif
()
############### test_CompareTwoOpts ###################
...
...
paddle/trainer/tests/sample_trainer_config_branch_net.conf
0 → 100644
浏览文件 @
17b4cea4
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle
.
trainer_config_helpers
import
*
################################### Data Configuration ###################################
TrainData
(
ProtoData
(
files
=
"trainer/tests/mnist.list"
))
################################### Algorithm Configuration ###################################
settings
(
batch_size
=
256
,
learning_method
=
MomentumOptimizer
(
momentum
=
0
.
5
,
sparse
=
False
))
################################### Network Configuration ###################################
data
=
data_layer
(
name
=
"input"
,
size
=
784
)
tmp
=
img_conv_layer
(
input
=
data
,
num_channels
=
1
,
filter_size
=
3
,
num_filters
=
32
,
padding
=
1
,
shared_biases
=
True
,
act
=
ReluActivation
())
a1
=
img_conv_layer
(
input
=
tmp
,
filter_size
=
1
,
num_filters
=
32
,
padding
=
0
,
shared_biases
=
True
,
act
=
ReluActivation
())
a2
=
img_conv_layer
(
input
=
tmp
,
filter_size
=
3
,
num_filters
=
32
,
padding
=
1
,
shared_biases
=
True
,
act
=
ReluActivation
())
tmp
=
concat_layer
(
input
=[
a1
,
a2
])
tmp
=
img_pool_layer
(
input
=
tmp
,
num_channels
=
64
,
pool_size
=
3
,
stride
=
2
,
padding
=
1
,
pool_type
=
AvgPooling
())
b1
=
img_conv_layer
(
input
=
tmp
,
filter_size
=
3
,
num_filters
=
64
,
padding
=
1
,
shared_biases
=
True
,
act
=
ReluActivation
())
b1
=
img_pool_layer
(
input
=
b1
,
pool_size
=
3
,
stride
=
1
,
padding
=
1
,
pool_type
=
MaxPooling
())
b2
=
img_conv_layer
(
input
=
tmp
,
filter_size
=
5
,
num_filters
=
64
,
padding
=
2
,
shared_biases
=
True
,
act
=
ReluActivation
())
b2
=
img_pool_layer
(
input
=
b2
,
pool_size
=
5
,
stride
=
1
,
padding
=
2
,
pool_type
=
MaxPooling
())
tmp
=
addto_layer
(
input
=[
b1
,
b2
],
act
=
ReluActivation
(),
bias_attr
=
False
)
tmp
=
img_pool_layer
(
input
=
tmp
,
pool_size
=
3
,
stride
=
2
,
padding
=
1
,
pool_type
=
MaxPooling
())
tmp
=
fc_layer
(
input
=
tmp
,
size
=
64
,
bias_attr
=
False
,
act
=
TanhActivation
())
output
=
fc_layer
(
input
=
tmp
,
size
=
10
,
bias_attr
=
True
,
act
=
SoftmaxActivation
())
lbl
=
data_layer
(
name
=
"label"
,
size
=
10
)
cost
=
classification_cost
(
input
=
output
,
label
=
lbl
)
outputs
(
cost
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录