Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
4f0869be
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4f0869be
编写于
9月 11, 2017
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add some necessary params for reset functions
上级
f3bb7b99
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
111 addition
and
84 deletion
+111
-84
paddle/gserver/layers/MKLDNNFcLayer.cpp
paddle/gserver/layers/MKLDNNFcLayer.cpp
+71
-64
paddle/gserver/layers/MKLDNNFcLayer.h
paddle/gserver/layers/MKLDNNFcLayer.h
+14
-5
paddle/gserver/layers/MKLDNNLayer.h
paddle/gserver/layers/MKLDNNLayer.h
+26
-15
未找到文件。
paddle/gserver/layers/MKLDNNFcLayer.cpp
浏览文件 @
4f0869be
...
...
@@ -78,46 +78,52 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
wgtVal_
->
reorderDataTo
(
wgtVal_
,
dstFmt
,
targetDim
);
}
void
MKLDNNFcLayer
::
reshape
()
{
reshapeInput
();
void
MKLDNNFcLayer
::
reshape
(
int
&
bs
,
int
&
ic
,
int
&
ih
,
int
&
iw
,
int
oc
,
int
&
oh
,
int
&
ow
)
{
reshapeInput
(
bs
,
ih
,
iw
);
CHECK_EQ
(
iLayerSize_
,
inputLayers_
[
0
]
->
getSize
());
ic
_
=
iLayerSize_
/
(
ih_
*
iw_
);
CHECK_EQ
(
size_t
(
ic
_
*
ih_
*
iw_
),
iLayerSize_
)
<<
"not divisible"
;
CHECK_EQ
(
size_t
(
oc
_
),
getSize
());
ic
=
iLayerSize_
/
(
ih
*
iw
);
CHECK_EQ
(
size_t
(
ic
*
ih
*
iw
),
iLayerSize_
)
<<
"not divisible"
;
CHECK_EQ
(
size_t
(
oc
),
getSize
());
reshapeOutput
(
oh
_
,
ow_
);
resizeOutput
(
bs
_
,
oc_
);
reshapeOutput
(
oh
,
ow
);
resizeOutput
(
bs
,
oc
);
printSizeInfo
();
}
void
MKLDNNFcLayer
::
resetFwd
()
{
void
MKLDNNFcLayer
::
resetFwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
bool
hasBias
=
biases_
&&
biases_
->
getW
();
const
MatrixPtr
&
wgt
=
weight_
->
getW
();
const
MatrixPtr
&
bias
=
hasBias
?
biases_
->
getW
()
:
nullptr
;
const
MatrixPtr
&
out
=
output_
.
value
;
const
MatrixPtr
&
wgt
Val
=
weight_
->
getW
();
const
MatrixPtr
&
bias
Val
=
hasBias
?
biases_
->
getW
()
:
nullptr
;
const
MatrixPtr
&
out
Val
=
output_
.
value
;
if
(
inputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
in
=
getInputValue
(
0
);
in
Val_
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
in
);
CHECK
(
in
Val_
)
<<
"Input should be MKLDNNMatrix"
;
const
MatrixPtr
&
in
Val
=
getInputValue
(
0
);
in
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
inVal
);
CHECK
(
in
)
<<
"Input should be MKLDNNMatrix"
;
}
else
{
CHECK_EQ
(
getPrev
(
0
)
->
getDeviceId
(),
CPU_DEVICE
)
<<
"Only support CPU yet"
;
const
MatrixPtr
&
in
=
getInputValue
(
0
,
CPU_DEVICE
);
in
Val_
=
MKLDNNMatrix
::
create
(
in
,
memory
::
dims
{
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
,
engine_
);
const
MatrixPtr
&
in
Val
=
getInputValue
(
0
,
CPU_DEVICE
);
in
=
MKLDNNMatrix
::
create
(
in
Val
,
memory
::
dims
{
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
,
engine_
);
}
in
Val_
->
downSpatial
();
wgt
Val_
=
MKLDNNMatrix
::
create
(
wgt
,
memory
::
dims
{
oc_
,
ic_
,
ih_
,
iw_
},
format
::
oihw
,
engine_
);
wgt
Val_
->
downSpatial
();
bias
Val_
=
hasBias
?
MKLDNNMatrix
::
create
(
bias
,
{
oc_
},
format
::
x
,
engine_
)
:
nullptr
;
out
Val_
=
MKLDNNMatrix
::
create
(
out
,
{
bs_
,
oc_
},
format
::
nc
,
engine_
);
in
->
downSpatial
();
wgt
=
MKLDNNMatrix
::
create
(
wgt
Val
,
memory
::
dims
{
oc_
,
ic_
,
ih_
,
iw_
},
format
::
oihw
,
engine_
);
wgt
->
downSpatial
();
bias
=
hasBias
?
MKLDNNMatrix
::
create
(
biasVal
,
{
oc_
},
format
::
x
,
engine_
)
:
nullptr
;
out
=
MKLDNNMatrix
::
create
(
outVal
,
{
bs_
,
oc_
},
format
::
nc
,
engine_
);
// change original output value to mkldnn output value
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
out
Val_
);
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
out
);
if
(
!
outputIsOnlyMKLDNN
())
{
// fc cpu output value do not need create convert
// just share point
...
...
@@ -127,27 +133,31 @@ void MKLDNNFcLayer::resetFwd() {
// create forward handle
prop_kind
pk
=
prop_kind
::
forward
;
fc_fwd
::
desc
fwdDesc
=
hasBias
?
fc_fwd
::
desc
(
pk
,
in
Val_
->
getMemoryDesc
(),
wgt
Val_
->
getMemoryDesc
(),
bias
Val_
->
getMemoryDesc
(),
out
Val_
->
getMemoryDesc
())
in
->
getMemoryDesc
(),
wgt
->
getMemoryDesc
(),
bias
->
getMemoryDesc
(),
out
->
getMemoryDesc
())
:
fc_fwd
::
desc
(
pk
,
in
Val_
->
getMemoryDesc
(),
wgt
Val_
->
getMemoryDesc
(),
out
Val_
->
getMemoryDesc
());
in
->
getMemoryDesc
(),
wgt
->
getMemoryDesc
(),
out
->
getMemoryDesc
());
fc_fwd
::
primitive_desc
fwdPD
=
fc_fwd
::
primitive_desc
(
fwdDesc
,
engine_
);
if
(
hasBias
)
{
fwd_
.
reset
(
new
fc_fwd
(
fwdPD
,
*
in
Val_
,
*
wgtVal_
,
*
biasVal_
,
*
outVal_
));
fwd_
.
reset
(
new
fc_fwd
(
fwdPD
,
*
in
,
*
wgt
,
*
bias
,
*
out
));
}
else
{
fwd_
.
reset
(
new
fc_fwd
(
fwdPD
,
*
in
Val_
,
*
wgtVal_
,
*
outVal_
));
fwd_
.
reset
(
new
fc_fwd
(
fwdPD
,
*
in
,
*
wgt
,
*
out
));
}
printValueFormatFlow
();
pipelineFwd_
.
clear
();
pipelineFwd_
.
push_back
(
*
fwd_
);
pipeline
.
push_back
(
*
fwd_
);
}
void
MKLDNNFcLayer
::
resetBwd
()
{
void
MKLDNNFcLayer
::
resetBwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
if
(
!
needResetBwd_
)
{
return
;
}
...
...
@@ -156,8 +166,8 @@ void MKLDNNFcLayer::resetBwd() {
/// backward weight
CHECK
(
inVal_
)
<<
"Should have input value"
;
const
MatrixPtr
&
wgt
=
weight_
->
getWGrad
();
const
MatrixPtr
&
bias
=
hasBias
?
biases_
->
getWGrad
()
:
nullptr
;
const
MatrixPtr
&
wgt
Grad
=
weight_
->
getWGrad
();
const
MatrixPtr
&
bias
Grad
=
hasBias
?
biases_
->
getWGrad
()
:
nullptr
;
// TODO(TJ): merge outgrad
int
device
=
outputIsOnlyMKLDNN
()
?
MKLDNN_DEVICE
:
CPU_DEVICE
;
...
...
@@ -168,59 +178,56 @@ void MKLDNNFcLayer::resetBwd() {
// for CPU device:
// fc do not need to convert from cpu device since output is always nc format
// only need create from cpu device
const
MatrixPtr
&
out
=
getOutput
(
device
).
grad
;
out
Grad_
=
MKLDNNMatrix
::
create
(
out
,
outVal_
->
getPrimitiveDesc
());
wgt
Grad_
=
MKLDNNMatrix
::
create
(
wgt
,
wgtVal_
->
getPrimitiveDesc
());
bias
Grad_
=
hasBias
?
MKLDNNMatrix
::
create
(
bias
,
biasVal_
->
getPrimitiveDesc
())
:
nullptr
;
const
MatrixPtr
&
out
Grad
=
getOutput
(
device
).
grad
;
out
=
MKLDNNMatrix
::
create
(
outGrad
,
outVal_
->
getPrimitiveDesc
());
wgt
=
MKLDNNMatrix
::
create
(
wgtGrad
,
wgtVal_
->
getPrimitiveDesc
());
bias
=
hasBias
?
MKLDNNMatrix
::
create
(
biasGrad
,
biasVal_
->
getPrimitiveDesc
())
:
nullptr
;
// create memory primitive desc
fc_fwd
::
desc
fwdDesc
=
fc_fwd
::
desc
(
prop_kind
::
forward
,
inVal_
->
getMemoryDesc
(),
wgt
Grad_
->
getMemoryDesc
(),
out
Grad_
->
getMemoryDesc
());
wgt
->
getMemoryDesc
(),
out
->
getMemoryDesc
());
fc_fwd
::
primitive_desc
fwdPD
=
fc_fwd
::
primitive_desc
(
fwdDesc
,
engine_
);
fc_bwdWgt
::
desc
bwdWgtDesc
=
hasBias
?
fc_bwdWgt
::
desc
(
inVal_
->
getMemoryDesc
(),
wgt
Grad_
->
getMemoryDesc
(),
bias
Grad_
->
getMemoryDesc
(),
out
Grad_
->
getMemoryDesc
())
wgt
->
getMemoryDesc
(),
bias
->
getMemoryDesc
(),
out
->
getMemoryDesc
())
:
fc_bwdWgt
::
desc
(
inVal_
->
getMemoryDesc
(),
wgt
Grad_
->
getMemoryDesc
(),
out
Grad_
->
getMemoryDesc
());
wgt
->
getMemoryDesc
(),
out
->
getMemoryDesc
());
fc_bwdWgt
::
primitive_desc
bwdWgtPD
=
fc_bwdWgt
::
primitive_desc
(
bwdWgtDesc
,
engine_
,
fwdPD
);
if
(
hasBias
)
{
bwdWgt_
.
reset
(
new
fc_bwdWgt
(
bwdWgtPD
,
*
inVal_
,
*
outGrad_
,
*
wgtGrad_
,
*
biasGrad_
));
bwdWgt_
.
reset
(
new
fc_bwdWgt
(
bwdWgtPD
,
*
inVal_
,
*
out
,
*
wgt
,
*
bias
));
}
else
{
bwdWgt_
.
reset
(
new
fc_bwdWgt
(
bwdWgtPD
,
*
inVal_
,
*
out
Grad_
,
*
wgtGrad_
));
bwdWgt_
.
reset
(
new
fc_bwdWgt
(
bwdWgtPD
,
*
inVal_
,
*
out
,
*
wgt
));
}
pipelineBwd_
.
clear
();
pipelineBwd_
.
push_back
(
*
bwdWgt_
);
pipeline
.
push_back
(
*
bwdWgt_
);
/// backward data
const
MatrixPtr
&
in
=
inputLayers_
[
0
]
->
getOutput
().
grad
;
if
(
in
==
nullptr
)
{
const
MatrixPtr
&
in
Grad
=
inputLayers_
[
0
]
->
getOutput
().
grad
;
if
(
in
Grad
==
nullptr
)
{
return
;
}
if
(
getInput
(
0
,
MKLDNN_DEVICE
).
getAllCount
()
>
1
)
{
// TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done
}
else
{
in
Grad_
=
MKLDNNMatrix
::
create
(
in
,
inVal_
->
getPrimitiveDesc
());
in
=
MKLDNNMatrix
::
create
(
inGrad
,
inVal_
->
getPrimitiveDesc
());
}
fc_bwdData
::
desc
bwdDataDesc
=
fc_bwdData
::
desc
(
inVal_
->
getMemoryDesc
(),
wgtGrad_
->
getMemoryDesc
(),
outGrad_
->
getMemoryDesc
());
fc_bwdData
::
desc
bwdDataDesc
=
fc_bwdData
::
desc
(
inVal_
->
getMemoryDesc
(),
wgt
->
getMemoryDesc
(),
out
->
getMemoryDesc
());
fc_bwdData
::
primitive_desc
bwdDataPD
=
fc_bwdData
::
primitive_desc
(
bwdDataDesc
,
engine_
,
fwdPD
);
CHECK
(
wgtVal_
)
<<
"Should have weight memory"
;
bwdData_
.
reset
(
new
fc_bwdData
(
bwdDataPD
,
*
out
Grad_
,
*
wgtVal_
,
*
inGrad_
));
bwdData_
.
reset
(
new
fc_bwdData
(
bwdDataPD
,
*
out
,
*
wgtVal_
,
*
in
));
printGradFormatFlow
();
pipeline
Bwd_
.
push_back
(
*
bwdData_
);
pipeline
.
push_back
(
*
bwdData_
);
}
void
MKLDNNFcLayer
::
updateInputData
()
{
...
...
paddle/gserver/layers/MKLDNNFcLayer.h
浏览文件 @
4f0869be
...
...
@@ -45,11 +45,20 @@ public:
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
override
;
void
reshape
()
override
;
void
resetFwd
()
override
;
void
resetBwd
()
override
;
void
reshape
(
int
&
bs
,
int
&
ic
,
int
&
ih
,
int
&
iw
,
int
oc
,
int
&
oh
,
int
&
ow
)
override
;
void
resetFwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
override
;
void
resetBwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
override
;
void
updateInputData
()
override
;
...
...
paddle/gserver/layers/MKLDNNLayer.h
浏览文件 @
4f0869be
...
...
@@ -111,13 +111,14 @@ public:
{
REGISTER_TIMER_INFO
(
"mkldnn_FwdTimer"
,
getName
().
c_str
());
copySeqInfoToOutputs
();
CHECK
(
!
inputLayers_
.
empty
());
copySeqInfoToOutputs
();
size_t
elemenCnt
=
inputLayers_
[
0
]
->
getOutput
().
value
->
getElementCnt
();
if
(
inputElemenCnt_
!=
elemenCnt
)
{
// reset when input total sizes changed, not only the batchsize
inputElemenCnt_
=
elemenCnt
;
reshape
();
resetFwd
();
reshape
(
bs_
,
ic_
,
ih_
,
iw_
,
oc_
,
oh_
,
ow_
);
resetFwd
(
pipelineFwd_
,
inVal_
,
wgtVal_
,
biasVal_
,
outVal_
);
convertWeightsFromPaddle
();
needResetBwd_
=
true
;
}
...
...
@@ -144,7 +145,7 @@ public:
{
REGISTER_TIMER_INFO
(
"mkldnn_bwdTimer"
,
getName
().
c_str
());
if
(
needResetBwd_
)
{
resetBwd
();
resetBwd
(
pipelineBwd_
,
inGrad_
,
wgtGrad_
,
biasGrad_
,
outGrad_
);
needResetBwd_
=
false
;
}
...
...
@@ -160,20 +161,30 @@ public:
/**
* reshape the input image sizes
* and reset output image and buffer size
* output channel can not be changed
*/
virtual
void
reshape
()
=
0
;
virtual
void
reshape
(
int
&
bs
,
int
&
ic
,
int
&
ih
,
int
&
iw
,
int
oc
,
int
&
oh
,
int
&
ow
)
=
0
;
/**
* reset the mkldnn forward primitve and memory
* only would be called when input size changes
*/
virtual
void
resetFwd
()
=
0
;
virtual
void
resetFwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
=
0
;
/**
* reset the mkldnn backward primitve and memory for mkldnn fc
* only would be called when needed
*/
virtual
void
resetBwd
()
=
0
;
virtual
void
resetBwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
=
0
;
/**
* Update input value data when input layer is "data" type.
...
...
@@ -207,16 +218,16 @@ protected:
/**
* reshape the input image sizes and input batchsize
*/
virtual
void
reshapeInput
()
{
virtual
void
reshapeInput
(
int
&
batchsize
,
int
&
height
,
int
&
width
)
{
const
Argument
&
input
=
inputLayers_
[
0
]
->
getOutput
();
b
s_
=
input
.
getBatchSize
();
int
h
eight
=
input
.
getFrameHeight
();
int
w
idth
=
input
.
getFrameWidth
();
if
(
h
eight
!=
0
)
{
ih_
=
height
;
b
atchsize
=
input
.
getBatchSize
();
int
h
=
input
.
getFrameHeight
();
int
w
=
input
.
getFrameWidth
();
if
(
h
!=
0
)
{
height
=
h
;
}
if
(
w
idth
!=
0
)
{
iw_
=
width
;
if
(
w
!=
0
)
{
width
=
w
;
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录