Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
6715beaa
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6715beaa
编写于
10月 12, 2017
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
enable merge output grad of mkldnn
上级
6604d7cd
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
120 addition
and
64 deletion
+120
-64
paddle/gserver/layers/Layer.h
paddle/gserver/layers/Layer.h
+6
-0
paddle/gserver/layers/MKLDNNConvLayer.cpp
paddle/gserver/layers/MKLDNNConvLayer.cpp
+27
-34
paddle/gserver/layers/MKLDNNFcLayer.cpp
paddle/gserver/layers/MKLDNNFcLayer.cpp
+9
-19
paddle/gserver/layers/MKLDNNLayer.h
paddle/gserver/layers/MKLDNNLayer.h
+70
-0
paddle/gserver/layers/MKLDNNPoolLayer.cpp
paddle/gserver/layers/MKLDNNPoolLayer.cpp
+8
-11
未找到文件。
paddle/gserver/layers/Layer.h
浏览文件 @
6715beaa
...
...
@@ -86,6 +86,7 @@ protected:
/// Also used in 'use_mkldnn' case.
std
::
vector
<
Argument
>
outputOtherDevice_
;
/// If there are several outputs, map them by each name.
/// MKLDNNLayer use it only to merge output grad
std
::
map
<
std
::
string
,
Argument
*>
outputMap_
;
/// Used to merge grad on different devices.
MatrixPtr
tmpGrad_
;
...
...
@@ -325,6 +326,11 @@ public:
outputMap_
[
name
]
=
output
;
}
/**
* Get the output map size, if layer has multi-output.
*/
size_t
getOutputMapSize
()
{
return
outputMap_
.
size
();
}
/**
* Get the output based on layer's name.
*/
...
...
paddle/gserver/layers/MKLDNNConvLayer.cpp
浏览文件 @
6715beaa
...
...
@@ -225,8 +225,6 @@ void MKLDNNConvLayer::resetFwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
if
(
cvtInVal_
)
{
pipeline
.
push_back
(
*
cvtInVal_
);
}
...
...
@@ -412,8 +410,6 @@ void MKLDNNConvLayer::resetBwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
if
(
cvtOutGrad_
)
{
pipeline
.
push_back
(
*
cvtOutGrad_
);
}
...
...
@@ -446,28 +442,27 @@ void MKLDNNConvLayer::resetBwdPipeline(
void
MKLDNNConvLayer
::
resetOutGrad
(
std
::
shared_ptr
<
conv_bwdWgt
::
primitive_desc
>&
wgtPD
,
MKLDNNMatrixPtr
&
out
)
{
const
MatrixPtr
&
outMat
=
output_
.
grad
;
out
=
MKLDNNMatrix
::
create
(
outMat
,
wgtPD
->
diff_dst_primitive_desc
());
CHECK
(
outVal_
!=
nullptr
&&
out
->
getPrimitiveDesc
()
==
outVal_
->
getPrimitiveDesc
())
<<
"primitive desc of out grad and value should be equal"
;
// TODO(TJ): merge outgrad
// create reorder if has output grad does not match
cpuOutGrad_
=
nullptr
;
cvtOutGrad_
=
nullptr
;
if
(
!
outputIsOnlyMKLDNN
())
{
CHECK
(
outVal_
!=
nullptr
&&
outVal_
->
getPrimitiveDesc
()
==
wgtPD
->
diff_dst_primitive_desc
())
<<
"primitive desc of out grad and value should be equal"
;
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
outMat
->
setData
(
cpuOut
->
getData
());
// same PrimitiveDesc with cpuInVal_
CHECK
(
cpuOutVal_
);
cpuOutGrad_
=
MKLDNNMatrix
::
create
(
cpuOut
,
cpuOutVal_
->
getPrimitiveDesc
());
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
==
out
->
getPrimitiveDesc
())
{
out
=
cpuOutGrad_
;
}
else
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
wgtPD
->
diff_dst_primitive_desc
());
// create reorder if primitive desc does not match
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
!=
outVal_
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
output_
.
grad
,
outVal_
->
getPrimitiveDesc
());
cvtOutGrad_
=
MKLDNNMatrix
::
createReorder
(
cpuOutGrad_
,
out
);
CHECK
(
cvtOutGrad_
);
}
else
{
// share the same data of CPU output
output_
.
grad
->
setData
(
cpuOut
->
getData
());
out
=
cpuOutGrad_
;
}
}
}
...
...
@@ -496,32 +491,30 @@ void MKLDNNConvLayer::resetWgtBiasGrad(
void
MKLDNNConvLayer
::
resetInGrad
(
std
::
shared_ptr
<
conv_bwdData
::
primitive_desc
>&
dataPD
,
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
cpuInGrad_
=
nullptr
;
cvtInGrad_
=
nullptr
;
if
(
dataPD
==
nullptr
)
{
return
;
}
// TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done
in
=
MKLDNNMatrix
::
create
(
inputLayers_
[
0
]
->
getOutput
().
grad
,
dataPD
->
diff_src_primitive_desc
());
CHECK
(
nullptr
!=
inVal_
&&
in
->
getPrimitiveDesc
()
==
inVal_
->
getPrimitiveDesc
())
<<
"primitive desc of input grad and value should be equal"
;
// create reorder if has output grad does not match
cpuInGrad_
=
nullptr
;
cvtInGrad_
=
nullptr
;
if
(
!
inputIsOnlyMKLDNN
())
{
if
(
inputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetInGrad
(
in
,
dataPD
->
diff_src_primitive_desc
());
CHECK
(
nullptr
!=
inVal_
&&
in
->
getPrimitiveDesc
()
==
inVal_
->
getPrimitiveDesc
())
<<
"primitive desc of input grad and value should be equal"
;
}
else
{
const
MatrixPtr
&
cpuIn
=
getInputGrad
(
0
,
CPU_DEVICE
);
// same PrimitiveDesc with cpuInVal_
CHECK
(
cpuInVal_
);
cpuInGrad_
=
MKLDNNMatrix
::
create
(
cpuIn
,
cpuInVal_
->
getPrimitiveDesc
());
if
(
cpuInGrad_
->
getPrimitiveDesc
()
!=
in
->
getPrimitiveDesc
())
{
const
MatrixPtr
&
dnnIn
=
getInputGrad
(
0
,
MKLDNN_DEVICE
);
in
=
MKLDNNMatrix
::
create
(
dnnIn
,
in
->
getPrimitiveDesc
());
in
=
cpuInGrad_
;
// create reorder if PrimitiveDesc does not match
if
(
cpuInGrad_
->
getPrimitiveDesc
()
!=
dataPD
->
diff_src_primitive_desc
())
{
in
=
MKLDNNMatrix
::
create
(
getInputGrad
(
0
,
MKLDNN_DEVICE
),
dataPD
->
diff_src_primitive_desc
());
cvtInGrad_
=
MKLDNNMatrix
::
createReorder
(
in
,
cpuInGrad_
);
CHECK
(
cvtInGrad_
);
}
else
{
in
=
cpuInGrad_
;
}
}
}
...
...
paddle/gserver/layers/MKLDNNFcLayer.cpp
浏览文件 @
6715beaa
...
...
@@ -214,8 +214,6 @@ void MKLDNNFcLayer::resetFwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
if
(
bias
)
{
fwd_
.
reset
(
new
fc_fwd
(
*
pd
,
*
in
,
*
wgt
,
*
bias
,
*
out
));
}
else
{
...
...
@@ -237,19 +235,14 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
}
void
MKLDNNFcLayer
::
resetOutGrad
(
MKLDNNMatrixPtr
&
out
)
{
// TODO(TJ): merge outgrad
int
device
=
outputIsOnlyMKLDNN
()
?
MKLDNN_DEVICE
:
CPU_DEVICE
;
output_
.
grad
->
setData
(
getOutput
(
device
).
grad
->
getData
());
// for MKLDNN device:
// can not directly cast outputgrad to mkldnnmatrix,
// since each layer can not write the inputgrad to mkldnn inputgrad.
// So just create from matrix with outputvalue format.
// for CPU device:
// fc do not need to convert from cpu device since output is always nc format
// only need create from cpu device
CHECK
(
outVal_
);
out
=
MKLDNNMatrix
::
create
(
getOutput
(
device
).
grad
,
outVal_
->
getPrimitiveDesc
());
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
output_
.
grad
->
setData
(
cpuOut
->
getData
());
out
=
MKLDNNMatrix
::
create
(
cpuOut
,
outVal_
->
getPrimitiveDesc
());
}
}
void
MKLDNNFcLayer
::
resetWgtBiasGrad
(
MKLDNNMatrixPtr
&
wgt
,
...
...
@@ -267,13 +260,11 @@ void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt,
void
MKLDNNFcLayer
::
resetInGrad
(
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
const
MatrixPtr
&
inGrad
=
inputLayers_
[
0
]
->
getOutput
().
grad
;
if
(
inGrad
==
nullptr
)
{
if
(
inputLayers_
[
0
]
->
getOutput
().
grad
==
nullptr
)
{
return
;
}
// TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done
CHECK
(
inVal_
);
in
=
MKLDNNMatrix
::
create
(
inGrad
,
inVal_
->
getPrimitiveDesc
());
MKLDNNLayer
::
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
}
void
MKLDNNFcLayer
::
resetBwdWgtPD
(
...
...
@@ -314,7 +305,6 @@ void MKLDNNFcLayer::resetBwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
CHECK
(
inVal_
);
if
(
bias
)
{
bwdWgt_
.
reset
(
new
fc_bwdWgt
(
*
bwdWgtPD
,
*
inVal_
,
*
out
,
*
wgt
,
*
bias
));
...
...
paddle/gserver/layers/MKLDNNLayer.h
浏览文件 @
6715beaa
...
...
@@ -65,6 +65,11 @@ protected:
MKLDNNMatrixPtr
biasVal_
;
MKLDNNMatrixPtr
biasGrad_
;
// merge grad primitive
std
::
shared_ptr
<
mkldnn
::
primitive
>
mergeGrad_
;
// tmp input argument to save input grad, only used to merge grad
Argument
tmpInArg_
;
public:
explicit
MKLDNNLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
),
...
...
@@ -99,6 +104,7 @@ public:
if
(
!
Layer
::
init
(
layerMap
,
parameterMap
))
{
return
false
;
}
setOutputMap
();
checkCPUOutputsNumber
();
stream_
.
reset
(
new
MKLDNNStream
());
...
...
@@ -118,6 +124,7 @@ public:
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" reset mkldnn forward"
;
// reset when input total sizes changed, not only the batchsize
inputElemenCnt_
=
elemenCnt
;
pipelineFwd_
.
clear
();
reshape
(
bs_
,
ic_
,
ih_
,
iw_
,
oc_
,
oh_
,
ow_
);
resetFwd
(
pipelineFwd_
,
inVal_
,
wgtVal_
,
biasVal_
,
outVal_
);
if
(
outVal_
)
{
...
...
@@ -144,6 +151,7 @@ public:
void
backward
(
const
UpdateCallback
&
callback
)
override
{
if
(
needResetBwd_
)
{
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" reset mkldnn backward"
;
pipelineBwd_
.
clear
();
resetBwd
(
pipelineBwd_
,
inGrad_
,
wgtGrad_
,
biasGrad_
,
outGrad_
);
needResetBwd_
=
false
;
}
...
...
@@ -247,6 +255,58 @@ protected:
}
}
/**
* reset the output grad matrix from primitive desc.
* and reset the merge grad primitive if needed.
* note: when this layer have serval output,
* do not support mixing with cpu device,
* because can not get memory desc from cpu device.
*/
virtual
void
resetOutGrad
(
MKLDNNMatrixPtr
&
out
,
mkldnn
::
memory
::
primitive_desc
pd
)
{
CHECK
(
outputIsOnlyMKLDNN
())
<<
"only support mixed with other device yet"
;
mergeGrad_
=
nullptr
;
out
=
MKLDNNMatrix
::
create
(
output_
.
grad
,
pd
);
if
(
outputMap_
.
size
()
<=
1
)
{
return
;
}
std
::
vector
<
double
>
scales
;
std
::
vector
<
mkldnn
::
memory
::
primitive_desc
>
srcPDs
;
std
::
vector
<
mkldnn
::
primitive
::
at
>
srcs
;
for
(
auto
it
=
outputMap_
.
begin
();
it
!=
outputMap_
.
end
();
++
it
)
{
MKLDNNMatrixPtr
src
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
it
->
second
->
grad
);
CHECK
(
src
)
<<
"should be MKLDNNMatrix"
;
auto
srcDims
=
src
->
getDims
();
auto
dstDims
=
out
->
getDims
();
CHECK_EQ
(
srcDims
.
size
(),
dstDims
.
size
());
for
(
size_t
i
=
0
;
i
<
srcDims
.
size
();
++
i
)
{
CHECK_EQ
(
srcDims
[
i
],
dstDims
[
i
]);
}
srcPDs
.
push_back
(
src
->
getPrimitiveDesc
());
srcs
.
push_back
(
*
src
);
scales
.
push_back
(
1.0
);
}
auto
sumPD
=
mkldnn
::
sum
::
primitive_desc
(
pd
.
desc
(),
scales
,
srcPDs
);
mergeGrad_
.
reset
(
new
mkldnn
::
sum
(
sumPD
,
srcs
,
*
out
));
pipelineBwd_
.
insert
(
pipelineBwd_
.
begin
(),
*
mergeGrad_
);
}
/**
* reset input grad from primitive desc.
* this function is avaiable for input is only mkldnn
* or input do not care cpu device
*/
virtual
void
resetInGrad
(
MKLDNNMatrixPtr
&
in
,
mkldnn
::
memory
::
primitive_desc
pd
)
{
LayerPtr
&
input
=
inputLayers_
[
0
];
const
MatrixPtr
&
grad
=
input
->
getOutputMapSize
()
>
1
?
nullptr
:
input
->
getOutput
().
grad
;
in
=
MKLDNNMatrix
::
create
(
grad
,
pd
);
auto
arg
=
input
->
getOutput
(
this
->
getName
());
arg
.
grad
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
in
);
}
/**
* print info about sizes
*/
...
...
@@ -334,6 +394,16 @@ private:
}
}
/**
* Set output map of prev layers.
*/
void
setOutputMap
()
{
outputMap_
.
clear
();
for
(
size_t
i
=
0
;
i
<
inputLayers_
.
size
();
++
i
)
{
inputLayers_
[
i
]
->
setOutput
(
getName
(),
&
tmpInArg_
);
}
}
/**
* Check the cpu device number of outputOtherDevice_.
* should have only one at most.
...
...
paddle/gserver/layers/MKLDNNPoolLayer.cpp
浏览文件 @
6715beaa
...
...
@@ -187,7 +187,6 @@ void MKLDNNPoolLayer::resetFwdPipeline(
std
::
shared_ptr
<
pool_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
fwd_
=
workspace_
?
std
::
make_shared
<
pool_fwd
>
(
pool_fwd
(
*
pd
,
*
in
,
*
out
,
*
workspace_
))
:
std
::
make_shared
<
pool_fwd
>
(
pool_fwd
(
*
pd
,
*
in
,
*
out
));
...
...
@@ -205,17 +204,17 @@ void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
resetInGrad
(
in
);
}
void
MKLDNNPoolLayer
::
resetOutGrad
(
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
outVal_
)
<<
"Should have output value"
;
out
=
MKLDNNMatrix
::
create
(
output_
.
grad
,
outVal_
->
getPrimitiveDesc
());
// create reorder if output value has cpu device and pd do not match
cpuOutGrad_
=
nullptr
;
cvtOutGrad_
=
nullptr
;
if
(
!
outputIsOnlyMKLDNN
())
{
CHECK
(
outVal_
);
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
cpuOutGrad_
=
MKLDNNMatrix
::
create
(
cpuOut
,
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
},
format
::
nchw
,
engine_
);
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
!=
out
->
getPrimitiveDesc
())
{
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
!=
outVal_
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
output_
.
grad
,
outVal_
->
getPrimitiveDesc
());
cvtOutGrad_
=
MKLDNNMatrix
::
createReorder
(
cpuOutGrad_
,
out
);
CHECK
(
cvtOutGrad_
)
<<
"should not be emptry"
;
}
else
{
...
...
@@ -228,12 +227,11 @@ void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) {
void
MKLDNNPoolLayer
::
resetInGrad
(
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
const
MatrixPtr
&
inGrad
=
inputLayers_
[
0
]
->
getOutput
().
grad
;
if
(
inGrad
==
nullptr
)
{
if
(
inputLayers_
[
0
]
->
getOutput
().
grad
==
nullptr
)
{
return
;
}
CHECK
(
inVal_
);
in
=
MKLDNNMatrix
::
create
(
inGrad
,
inVal_
->
getPrimitiveDesc
());
MKLDNNLayer
::
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
}
void
MKLDNNPoolLayer
::
resetBwdPD
(
std
::
shared_ptr
<
pool_bwd
::
primitive_desc
>&
pd
,
...
...
@@ -261,7 +259,6 @@ void MKLDNNPoolLayer::resetBwdPipeline(
std
::
shared_ptr
<
pool_bwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
if
(
cvtOutGrad_
)
{
pipeline
.
push_back
(
*
cvtOutGrad_
);
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录