Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
171fee2c
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
171fee2c
编写于
9月 12, 2017
作者:
T
Tao Luo
提交者:
GitHub
9月 12, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4011 from tensor-tang/refine
Refine MKLDNNLayer and MKLDNNTester
上级
6d0d29f6
7f7fa325
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
325 addition
and
230 deletion
+325
-230
paddle/gserver/layers/Layer.h
paddle/gserver/layers/Layer.h
+6
-5
paddle/gserver/layers/MKLDNNFcLayer.cpp
paddle/gserver/layers/MKLDNNFcLayer.cpp
+81
-131
paddle/gserver/layers/MKLDNNFcLayer.h
paddle/gserver/layers/MKLDNNFcLayer.h
+17
-24
paddle/gserver/layers/MKLDNNLayer.h
paddle/gserver/layers/MKLDNNLayer.h
+169
-32
paddle/gserver/tests/MKLDNNTester.cpp
paddle/gserver/tests/MKLDNNTester.cpp
+45
-33
paddle/gserver/tests/MKLDNNTester.h
paddle/gserver/tests/MKLDNNTester.h
+7
-5
未找到文件。
paddle/gserver/layers/Layer.h
浏览文件 @
171fee2c
...
...
@@ -49,6 +49,12 @@ struct LayerState {
};
typedef
std
::
shared_ptr
<
LayerState
>
LayerStatePtr
;
/// Paddle device ID, MKLDNN is -2, CPU is -1
enum
PADDLE_DEVICE_ID
{
MKLDNN_DEVICE
=
-
2
,
CPU_DEVICE
=
-
1
,
};
/**
* @brief Base class for layer.
* Define necessary variables and functions for every layer.
...
...
@@ -59,11 +65,6 @@ protected:
LayerConfig
config_
;
/// whether to use GPU
bool
useGpu_
;
/// Paddle device ID, MKLDNN is -2, CPU is -1
enum
PADDLE_DEVICE_ID
{
MKLDNN_DEVICE
=
-
2
,
CPU_DEVICE
=
-
1
,
};
/// Device Id. MKLDNN is -2, CPU is -1, and GPU is 0, 1, 2 ...
int
deviceId_
;
/// Input layers
...
...
paddle/gserver/layers/MKLDNNFcLayer.cpp
浏览文件 @
171fee2c
...
...
@@ -14,7 +14,6 @@ limitations under the License. */
#include "MKLDNNFcLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
using
namespace
mkldnn
;
// NOLINT
typedef
memory
::
format
format
;
...
...
@@ -40,6 +39,8 @@ bool MKLDNNFcLayer::init(const LayerMap& layerMap,
oc_
=
getSize
();
oh_
=
1
;
ow_
=
1
;
ih_
=
1
;
iw_
=
1
;
// input size can not change in FC
iLayerSize_
=
inputLayers_
[
0
]
->
getSize
();
...
...
@@ -77,67 +78,53 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
wgtVal_
->
reorderDataTo
(
wgtVal_
,
dstFmt
,
targetDim
);
}
void
MKLDNNFcLayer
::
reshape
()
{
const
Argument
&
input
=
getInput
(
0
,
getPrev
(
0
)
->
getDeviceId
());
int
batchSize
=
input
.
getBatchSize
();
if
(
bs_
==
batchSize
)
{
return
;
}
bs_
=
batchSize
;
ih_
=
input
.
getFrameHeight
();
iw_
=
input
.
getFrameWidth
();
if
(
ih_
==
0
)
{
ih_
=
1
;
}
if
(
iw_
==
0
)
{
iw_
=
1
;
}
CHECK_EQ
(
iLayerSize_
,
inputLayers_
[
0
]
->
getSize
());
ic_
=
iLayerSize_
/
(
ih_
*
iw_
);
CHECK_EQ
(
size_t
(
ic_
*
ih_
*
iw_
),
iLayerSize_
)
<<
"not divisible"
;
CHECK_EQ
(
size_t
(
oc_
),
getSize
());
printSizeInfo
();
void
MKLDNNFcLayer
::
reshape
(
int
&
bs
,
int
&
ic
,
int
&
ih
,
int
&
iw
,
int
oc
,
int
&
oh
,
int
&
ow
)
{
reshapeInput
(
bs
,
ih
,
iw
);
// reset output
output_
.
setFrameHeight
(
oh_
);
output_
.
setFrameWidth
(
ow_
)
;
resetOutput
(
bs_
,
oc_
);
CHECK_EQ
(
iLayerSize_
,
inputLayers_
[
0
]
->
getSize
());
ic
=
iLayerSize_
/
(
ih
*
iw
);
CHECK_EQ
(
size_t
(
ic
*
ih
*
iw
),
iLayerSize_
)
<<
"not divisible"
;
CHECK_EQ
(
size_t
(
oc
),
getSize
()
);
// reset mkldnn forward
resetFwd
();
needResetBwd_
=
true
;
reshapeOutput
(
oh
,
ow
);
resizeOutput
(
bs
,
oc
);
convertWeightsFromPaddle
();
printSizeInfo
();
}
void
MKLDNNFcLayer
::
resetFwd
()
{
void
MKLDNNFcLayer
::
resetFwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
bool
hasBias
=
biases_
&&
biases_
->
getW
();
const
MatrixPtr
&
wgt
=
weight_
->
getW
();
const
MatrixPtr
&
bias
=
hasBias
?
biases_
->
getW
()
:
nullptr
;
const
MatrixPtr
&
out
=
output_
.
value
;
const
MatrixPtr
&
wgt
Val
=
weight_
->
getW
();
const
MatrixPtr
&
bias
Val
=
hasBias
?
biases_
->
getW
()
:
nullptr
;
const
MatrixPtr
&
out
Val
=
output_
.
value
;
if
(
inputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
in
=
getInputValue
(
0
);
in
Val_
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
in
);
CHECK
(
in
Val_
)
<<
"Input should be MKLDNNMatrix"
;
const
MatrixPtr
&
in
Val
=
getInputValue
(
0
);
in
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
inVal
);
CHECK
(
in
)
<<
"Input should be MKLDNNMatrix"
;
}
else
{
CHECK_EQ
(
getPrev
(
0
)
->
getDeviceId
(),
CPU_DEVICE
)
<<
"Only support CPU yet"
;
const
MatrixPtr
&
in
=
getInputValue
(
0
,
CPU_DEVICE
);
in
Val_
=
MKLDNNMatrix
::
create
(
in
,
memory
::
dims
{
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
,
engine_
);
}
in
Val_
->
downSpatial
();
wgt
Val_
=
MKLDNNMatrix
::
create
(
wgt
,
memory
::
dims
{
oc_
,
ic_
,
ih_
,
iw_
},
format
::
oihw
,
engine_
);
wgt
Val_
->
downSpatial
();
bias
Val_
=
hasBias
?
MKLDNNMatrix
::
create
(
bias
,
{
oc_
},
format
::
x
,
engine_
)
:
nullptr
;
out
Val_
=
MKLDNNMatrix
::
create
(
out
,
{
bs_
,
oc_
},
format
::
nc
,
engine_
);
const
MatrixPtr
&
in
Val
=
getInputValue
(
0
,
CPU_DEVICE
);
in
=
MKLDNNMatrix
::
create
(
in
Val
,
memory
::
dims
{
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
,
engine_
);
}
in
->
downSpatial
();
wgt
=
MKLDNNMatrix
::
create
(
wgt
Val
,
memory
::
dims
{
oc_
,
ic_
,
ih_
,
iw_
},
format
::
oihw
,
engine_
);
wgt
->
downSpatial
();
bias
=
hasBias
?
MKLDNNMatrix
::
create
(
biasVal
,
{
oc_
},
format
::
x
,
engine_
)
:
nullptr
;
out
=
MKLDNNMatrix
::
create
(
outVal
,
{
bs_
,
oc_
},
format
::
nc
,
engine_
);
// change original output value to mkldnn output value
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
out
Val_
);
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
out
);
if
(
!
outputIsOnlyMKLDNN
())
{
copyOutputInfoToOtherDevice
();
// fc cpu output value do not need create convert
// just share point
getOutput
(
CPU_DEVICE
).
value
->
setData
(
output_
.
value
->
getData
());
...
...
@@ -146,27 +133,31 @@ void MKLDNNFcLayer::resetFwd() {
// create forward handle
prop_kind
pk
=
prop_kind
::
forward
;
fc_fwd
::
desc
fwdDesc
=
hasBias
?
fc_fwd
::
desc
(
pk
,
in
Val_
->
getMemoryDesc
(),
wgt
Val_
->
getMemoryDesc
(),
bias
Val_
->
getMemoryDesc
(),
out
Val_
->
getMemoryDesc
())
in
->
getMemoryDesc
(),
wgt
->
getMemoryDesc
(),
bias
->
getMemoryDesc
(),
out
->
getMemoryDesc
())
:
fc_fwd
::
desc
(
pk
,
in
Val_
->
getMemoryDesc
(),
wgt
Val_
->
getMemoryDesc
(),
out
Val_
->
getMemoryDesc
());
in
->
getMemoryDesc
(),
wgt
->
getMemoryDesc
(),
out
->
getMemoryDesc
());
fc_fwd
::
primitive_desc
fwdPD
=
fc_fwd
::
primitive_desc
(
fwdDesc
,
engine_
);
if
(
hasBias
)
{
fwd_
.
reset
(
new
fc_fwd
(
fwdPD
,
*
in
Val_
,
*
wgtVal_
,
*
biasVal_
,
*
outVal_
));
fwd_
.
reset
(
new
fc_fwd
(
fwdPD
,
*
in
,
*
wgt
,
*
bias
,
*
out
));
}
else
{
fwd_
.
reset
(
new
fc_fwd
(
fwdPD
,
*
in
Val_
,
*
wgtVal_
,
*
outVal_
));
fwd_
.
reset
(
new
fc_fwd
(
fwdPD
,
*
in
,
*
wgt
,
*
out
));
}
printValueFormatFlow
();
pipelineFwd_
.
clear
();
pipelineFwd_
.
push_back
(
*
fwd_
);
pipeline
.
push_back
(
*
fwd_
);
}
void
MKLDNNFcLayer
::
resetBwd
()
{
void
MKLDNNFcLayer
::
resetBwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
pipeline
.
clear
();
if
(
!
needResetBwd_
)
{
return
;
}
...
...
@@ -175,8 +166,8 @@ void MKLDNNFcLayer::resetBwd() {
/// backward weight
CHECK
(
inVal_
)
<<
"Should have input value"
;
const
MatrixPtr
&
wgt
=
weight_
->
getWGrad
();
const
MatrixPtr
&
bias
=
hasBias
?
biases_
->
getWGrad
()
:
nullptr
;
const
MatrixPtr
&
wgt
Grad
=
weight_
->
getWGrad
();
const
MatrixPtr
&
bias
Grad
=
hasBias
?
biases_
->
getWGrad
()
:
nullptr
;
// TODO(TJ): merge outgrad
int
device
=
outputIsOnlyMKLDNN
()
?
MKLDNN_DEVICE
:
CPU_DEVICE
;
...
...
@@ -187,107 +178,66 @@ void MKLDNNFcLayer::resetBwd() {
// for CPU device:
// fc do not need to convert from cpu device since output is always nc format
// only need create from cpu device
const
MatrixPtr
&
out
=
getOutput
(
device
).
grad
;
out
Grad_
=
MKLDNNMatrix
::
create
(
out
,
outVal_
->
getPrimitiveDesc
());
wgt
Grad_
=
MKLDNNMatrix
::
create
(
wgt
,
wgtVal_
->
getPrimitiveDesc
());
bias
Grad_
=
hasBias
?
MKLDNNMatrix
::
create
(
bias
,
biasVal_
->
getPrimitiveDesc
())
:
nullptr
;
const
MatrixPtr
&
out
Grad
=
getOutput
(
device
).
grad
;
out
=
MKLDNNMatrix
::
create
(
outGrad
,
outVal_
->
getPrimitiveDesc
());
wgt
=
MKLDNNMatrix
::
create
(
wgtGrad
,
wgtVal_
->
getPrimitiveDesc
());
bias
=
hasBias
?
MKLDNNMatrix
::
create
(
biasGrad
,
biasVal_
->
getPrimitiveDesc
())
:
nullptr
;
// create memory primitive desc
fc_fwd
::
desc
fwdDesc
=
fc_fwd
::
desc
(
prop_kind
::
forward
,
inVal_
->
getMemoryDesc
(),
wgt
Grad_
->
getMemoryDesc
(),
out
Grad_
->
getMemoryDesc
());
wgt
->
getMemoryDesc
(),
out
->
getMemoryDesc
());
fc_fwd
::
primitive_desc
fwdPD
=
fc_fwd
::
primitive_desc
(
fwdDesc
,
engine_
);
fc_bwdWgt
::
desc
bwdWgtDesc
=
hasBias
?
fc_bwdWgt
::
desc
(
inVal_
->
getMemoryDesc
(),
wgt
Grad_
->
getMemoryDesc
(),
bias
Grad_
->
getMemoryDesc
(),
out
Grad_
->
getMemoryDesc
())
wgt
->
getMemoryDesc
(),
bias
->
getMemoryDesc
(),
out
->
getMemoryDesc
())
:
fc_bwdWgt
::
desc
(
inVal_
->
getMemoryDesc
(),
wgt
Grad_
->
getMemoryDesc
(),
out
Grad_
->
getMemoryDesc
());
wgt
->
getMemoryDesc
(),
out
->
getMemoryDesc
());
fc_bwdWgt
::
primitive_desc
bwdWgtPD
=
fc_bwdWgt
::
primitive_desc
(
bwdWgtDesc
,
engine_
,
fwdPD
);
if
(
hasBias
)
{
bwdWgt_
.
reset
(
new
fc_bwdWgt
(
bwdWgtPD
,
*
inVal_
,
*
outGrad_
,
*
wgtGrad_
,
*
biasGrad_
));
bwdWgt_
.
reset
(
new
fc_bwdWgt
(
bwdWgtPD
,
*
inVal_
,
*
out
,
*
wgt
,
*
bias
));
}
else
{
bwdWgt_
.
reset
(
new
fc_bwdWgt
(
bwdWgtPD
,
*
inVal_
,
*
out
Grad_
,
*
wgtGrad_
));
bwdWgt_
.
reset
(
new
fc_bwdWgt
(
bwdWgtPD
,
*
inVal_
,
*
out
,
*
wgt
));
}
pipelineBwd_
.
clear
();
pipelineBwd_
.
push_back
(
*
bwdWgt_
);
pipeline
.
push_back
(
*
bwdWgt_
);
/// backward data
const
MatrixPtr
&
in
=
inputLayers_
[
0
]
->
getOutput
().
grad
;
if
(
in
==
nullptr
)
{
const
MatrixPtr
&
in
Grad
=
inputLayers_
[
0
]
->
getOutput
().
grad
;
if
(
in
Grad
==
nullptr
)
{
return
;
}
if
(
getInput
(
0
,
MKLDNN_DEVICE
).
getAllCount
()
>
1
)
{
// TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done
}
else
{
in
Grad_
=
MKLDNNMatrix
::
create
(
in
,
inVal_
->
getPrimitiveDesc
());
in
=
MKLDNNMatrix
::
create
(
inGrad
,
inVal_
->
getPrimitiveDesc
());
}
fc_bwdData
::
desc
bwdDataDesc
=
fc_bwdData
::
desc
(
inVal_
->
getMemoryDesc
(),
wgtGrad_
->
getMemoryDesc
(),
outGrad_
->
getMemoryDesc
());
fc_bwdData
::
desc
bwdDataDesc
=
fc_bwdData
::
desc
(
inVal_
->
getMemoryDesc
(),
wgt
->
getMemoryDesc
(),
out
->
getMemoryDesc
());
fc_bwdData
::
primitive_desc
bwdDataPD
=
fc_bwdData
::
primitive_desc
(
bwdDataDesc
,
engine_
,
fwdPD
);
CHECK
(
wgtVal_
)
<<
"Should have weight memory"
;
bwdData_
.
reset
(
new
fc_bwdData
(
bwdDataPD
,
*
out
Grad_
,
*
wgtVal_
,
*
inGrad_
));
bwdData_
.
reset
(
new
fc_bwdData
(
bwdDataPD
,
*
out
,
*
wgtVal_
,
*
in
));
printGradFormatFlow
();
pipeline
Bwd_
.
push_back
(
*
bwdData_
);
pipeline
.
push_back
(
*
bwdData_
);
}
void
MKLDNNFcLayer
::
updateInputData
()
{
if
(
inputLayers_
[
0
]
->
getType
()
!=
"data"
)
{
return
;
}
real
*
iData
=
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
();
inVal_
->
setData
(
iData
);
inVal_
->
setData
(
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
());
}
void
MKLDNNFcLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
reshape
();
{
REGISTER_TIMER_INFO
(
"mkldnn_FwdTimer"
,
getName
().
c_str
());
updateInputData
();
// just submit forward pipeline
stream_
->
submit
(
pipelineFwd_
);
}
/* activation */
{
REGISTER_TIMER_INFO
(
"FwActTimer"
,
getName
().
c_str
());
forwardActivation
();
}
}
void
MKLDNNFcLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
/* Do derivation */
{
REGISTER_TIMER_INFO
(
"BpActTimer"
,
getName
().
c_str
());
backwardActivation
();
}
{
REGISTER_TIMER_INFO
(
"mkldnn_bwdTimer"
,
getName
().
c_str
());
resetBwd
();
// just sumbmit backward pipeline
stream_
->
submit
(
pipelineBwd_
);
}
{
REGISTER_TIMER_INFO
(
"WeightUpdate"
,
getName
().
c_str
());
weight_
->
getParameterPtr
()
->
incUpdate
(
callback
);
if
(
biases_
&&
biases_
->
getWGrad
())
{
biases_
->
getParameterPtr
()
->
incUpdate
(
callback
);
}
void
MKLDNNFcLayer
::
updateWeights
(
const
UpdateCallback
&
callback
)
{
weight_
->
getParameterPtr
()
->
incUpdate
(
callback
);
if
(
biases_
&&
biases_
->
getWGrad
())
{
biases_
->
getParameterPtr
()
->
incUpdate
(
callback
);
}
}
}
// namespace paddle
paddle/gserver/layers/MKLDNNFcLayer.h
浏览文件 @
171fee2c
...
...
@@ -45,35 +45,28 @@ public:
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
override
;
void
convertWeightsFromPaddle
()
override
;
void
convertWeightsToPaddle
()
override
;
void
reshape
(
int
&
bs
,
int
&
ic
,
int
&
ih
,
int
&
iw
,
int
oc
,
int
&
oh
,
int
&
ow
)
override
;
void
forward
(
PassType
passType
)
override
;
void
resetFwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
override
;
void
backward
(
const
UpdateCallback
&
callback
)
override
;
void
resetBwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
override
;
void
updateInputData
()
override
;
protected:
/**
* reshape the input image sizes
* and reset output buffer size
* and reset mkldnn forward
*/
void
reshape
();
/**
* reset the forward primitve and memory
* only would be called when input size changes
*/
void
resetFwd
();
/**
* reset the backward primitve and memory for mkldnn fc
* only would be called when needed
*/
void
resetBwd
();
void
updateWeights
(
const
UpdateCallback
&
callback
)
override
;
void
convertWeightsFromPaddle
()
override
;
void
convertWeightsToPaddle
()
override
;
};
}
// namespace paddle
paddle/gserver/layers/MKLDNNLayer.h
浏览文件 @
171fee2c
...
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include "MKLDNNBase.h"
#include "mkldnn.hpp"
#include "paddle/math/MKLDNNMatrix.h"
#include "paddle/utils/Stat.h"
DECLARE_bool
(
use_mkldnn
);
...
...
@@ -33,6 +34,8 @@ typedef std::shared_ptr<MKLDNNLayer> MKLDNNLayerPtr;
*/
class
MKLDNNLayer
:
public
Layer
{
protected:
// input value element count
size_t
inputElemenCnt_
;
// batch size
int
bs_
;
// input image channel, height and width
...
...
@@ -52,7 +55,7 @@ protected:
std
::
vector
<
mkldnn
::
primitive
>
pipelineFwd_
;
std
::
vector
<
mkldnn
::
primitive
>
pipelineBwd_
;
// MKLDNNMatrixPtr
// MKLDNNMatrixPtr
with internal format
MKLDNNMatrixPtr
inVal_
;
MKLDNNMatrixPtr
inGrad_
;
MKLDNNMatrixPtr
outVal_
;
...
...
@@ -65,6 +68,7 @@ protected:
public:
explicit
MKLDNNLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
),
inputElemenCnt_
(
0
),
bs_
(
0
),
ic_
(
0
),
ih_
(
0
),
...
...
@@ -95,12 +99,104 @@ public:
if
(
!
Layer
::
init
(
layerMap
,
parameterMap
))
{
return
false
;
}
checkCPUOutputsNumber
();
stream_
.
reset
(
new
MKLDNNStream
());
engine_
=
CPUEngine
::
Instance
().
getEngine
();
return
true
;
}
void
forward
(
PassType
passType
)
override
{
passType_
=
passType
;
{
REGISTER_TIMER_INFO
(
"mkldnn_FwdTimer"
,
getName
().
c_str
());
CHECK
(
!
inputLayers_
.
empty
());
copySeqInfoToOutputs
();
size_t
elemenCnt
=
inputLayers_
[
0
]
->
getOutput
().
value
->
getElementCnt
();
if
(
inputElemenCnt_
!=
elemenCnt
)
{
// reset when input total sizes changed, not only the batchsize
inputElemenCnt_
=
elemenCnt
;
reshape
(
bs_
,
ic_
,
ih_
,
iw_
,
oc_
,
oh_
,
ow_
);
resetFwd
(
pipelineFwd_
,
inVal_
,
wgtVal_
,
biasVal_
,
outVal_
);
convertWeightsFromPaddle
();
needResetBwd_
=
true
;
}
if
(
inputLayers_
[
0
]
->
getType
()
==
"data"
)
{
updateInputData
();
}
stream_
->
submit
(
pipelineFwd_
);
}
/* activation */
{
REGISTER_TIMER_INFO
(
"FwActTimer"
,
getName
().
c_str
());
forwardActivation
();
}
}
void
backward
(
const
UpdateCallback
&
callback
)
override
{
/* Do derivation */
{
REGISTER_TIMER_INFO
(
"BpActTimer"
,
getName
().
c_str
());
backwardActivation
();
}
{
REGISTER_TIMER_INFO
(
"mkldnn_bwdTimer"
,
getName
().
c_str
());
if
(
needResetBwd_
)
{
resetBwd
(
pipelineBwd_
,
inGrad_
,
wgtGrad_
,
biasGrad_
,
outGrad_
);
needResetBwd_
=
false
;
}
stream_
->
submit
(
pipelineBwd_
);
}
{
REGISTER_TIMER_INFO
(
"WeightUpdate"
,
getName
().
c_str
());
updateWeights
(
callback
);
}
}
/**
* reshape the input image sizes
* and reset output image and buffer size
* output channel can not be changed
*/
virtual
void
reshape
(
int
&
bs
,
int
&
ic
,
int
&
ih
,
int
&
iw
,
int
oc
,
int
&
oh
,
int
&
ow
)
=
0
;
/**
* reset the mkldnn forward primitve and memory
* only would be called when input size changes
*/
virtual
void
resetFwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
=
0
;
/**
* reset the mkldnn backward primitve and memory for mkldnn fc
* only would be called when needed
*/
virtual
void
resetBwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
=
0
;
/**
* Update input value data when input layer is "data" type.
* Since the input value data address might be changed.
*/
virtual
void
updateInputData
()
{}
/**
* Update weights and biases if necessary.
*/
virtual
void
updateWeights
(
const
UpdateCallback
&
callback
)
{}
/**
* convert weight from paddle format to mkldnn format
* weight_ will be override
...
...
@@ -114,10 +210,38 @@ public:
virtual
void
convertWeightsToPaddle
()
{}
/**
* Update input value data when input layer is "data" type.
* Since the input value data address might be changed.
* add this interface as public for unit test
*/
virtual
void
updateInputData
()
{}
void
addOutputArgument
(
int
deviceId
)
{
Layer
::
addOutputArgument
(
deviceId
);
}
protected:
/**
* reshape the input image sizes and input batchsize
*/
virtual
void
reshapeInput
(
int
&
batchsize
,
int
&
height
,
int
&
width
)
{
const
Argument
&
input
=
inputLayers_
[
0
]
->
getOutput
();
batchsize
=
input
.
getBatchSize
();
int
h
=
input
.
getFrameHeight
();
int
w
=
input
.
getFrameWidth
();
if
(
h
!=
0
)
{
height
=
h
;
}
if
(
w
!=
0
)
{
width
=
w
;
}
}
/**
* reshape output image sizes
*/
virtual
void
reshapeOutput
(
size_t
height
,
size_t
width
)
{
output_
.
setFrameHeight
(
height
);
output_
.
setFrameWidth
(
width
);
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
outputOtherDevice_
[
i
].
setFrameHeight
(
height
);
outputOtherDevice_
[
i
].
setFrameWidth
(
width
);
}
}
/**
* print info about sizes
...
...
@@ -133,8 +257,8 @@ public:
*/
virtual
void
printValueFormatFlow
()
{
if
(
inVal_
&&
outVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"value format flow --- "
<<
inVal_
->
getFormat
()
<<
" >>> "
<<
outVal_
->
getFormat
();
VLOG
(
MKLDNN_FMTS
)
<<
inVal_
->
getFormat
()
<<
" >>> "
<<
outVal_
->
getFormat
();
}
}
...
...
@@ -143,36 +267,12 @@ public:
*/
virtual
void
printGradFormatFlow
()
{
if
(
inGrad_
&&
outGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"grad format flow --- "
<<
inGrad_
->
getFormat
()
<<
" <<< "
<<
outGrad_
->
getFormat
();
VLOG
(
MKLDNN_FMTS
)
<<
inGrad_
->
getFormat
()
<<
" <<< "
<<
outGrad_
->
getFormat
();
}
}
protected:
/**
* copy image size and sequence info to other device
* @note: can not directly use Layer::copyOutputToOtherDevice since here only
* copy base info and do not copy data value
*/
void
copyOutputInfoToOtherDevice
()
{
int
cnt
=
0
;
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
outputOtherDevice_
[
i
].
setFrameHeight
(
output_
.
getFrameHeight
());
outputOtherDevice_
[
i
].
setFrameWidth
(
output_
.
getFrameWidth
());
outputOtherDevice_
[
i
].
sequenceStartPositions
=
output_
.
sequenceStartPositions
;
outputOtherDevice_
[
i
].
subSequenceStartPositions
=
output_
.
subSequenceStartPositions
;
outputOtherDevice_
[
i
].
cpuSequenceDims
=
output_
.
cpuSequenceDims
;
if
(
outputOtherDevice_
[
i
].
deviceId
==
CPU_DEVICE
)
{
++
cnt
;
}
}
if
(
cnt
>
1
)
{
LOG
(
WARNING
)
<<
"should not have more than one CPU devie"
;
}
}
/**
* If input only has MKLDNN device.
* Otherwise, only support the previous layer using CPU device.
...
...
@@ -205,6 +305,7 @@ protected:
*/
void
setDevice
(
int
id
)
{
deviceId_
=
id
;
}
private:
/**
* Set deviceId of the params used in this layer.
*/
...
...
@@ -228,6 +329,42 @@ protected:
parameter
->
setDevice
(
id
);
}
}
/**
* Check the cpu device number of outputOtherDevice_.
* should have only one at most.
*/
void
checkCPUOutputsNumber
(
int
max
=
1
)
{
int
cnt
=
0
;
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
if
(
outputOtherDevice_
[
i
].
deviceId
==
CPU_DEVICE
)
{
++
cnt
;
}
}
CHECK_LE
(
cnt
,
max
)
<<
"too much CPU devies"
;
}
/**
* copy SeqInfo from input layer to this output and other output devices.
* @note: do not use getInput(0) since it used this deviceId_,
* use "inputLayers_[0]->getOutput()" instead.
*/
void
copySeqInfoToOutputs
()
{
if
(
inputLayers_
.
empty
()
||
!
needSequenceInfo_
)
{
return
;
}
const
Argument
&
input
=
inputLayers_
[
0
]
->
getOutput
();
output_
.
sequenceStartPositions
=
input
.
sequenceStartPositions
;
output_
.
subSequenceStartPositions
=
input
.
subSequenceStartPositions
;
output_
.
cpuSequenceDims
=
input
.
cpuSequenceDims
;
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
outputOtherDevice_
[
i
].
sequenceStartPositions
=
output_
.
sequenceStartPositions
;
outputOtherDevice_
[
i
].
subSequenceStartPositions
=
output_
.
subSequenceStartPositions
;
outputOtherDevice_
[
i
].
cpuSequenceDims
=
output_
.
cpuSequenceDims
;
}
}
};
}
// namespace paddle
paddle/gserver/tests/MKLDNNTester.cpp
浏览文件 @
171fee2c
...
...
@@ -63,8 +63,12 @@ void MKLDNNTester::reset(const TestConfig& dnn,
initTestLayer
(
configs_
[
i
],
&
(
layerMaps_
[
i
]),
&
(
parameters_
[
i
]),
&
(
testLayers_
[
i
]));
}
dnnLayer_
=
testLayers_
[
DNN
];
refLayer_
=
testLayers_
[
REF
];
dnnLayer_
=
std
::
dynamic_pointer_cast
<
MKLDNNLayer
>
(
testLayers_
[
DNN
]);
CHECK
(
dnnLayer_
);
// for comparison with Paddle reference results,
// need manually add cpu device output for test
dnnLayer_
->
addOutputArgument
(
CPU_DEVICE
);
EXPECT_EQ
(
dataLayers_
[
DNN
].
size
(),
dataLayers_
[
REF
].
size
());
EXPECT_EQ
(
parameters_
[
DNN
].
size
(),
parameters_
[
REF
].
size
());
...
...
@@ -109,20 +113,22 @@ void MKLDNNTester::randomBotDatas() {
void
MKLDNNTester
::
randomTopDiffs
()
{
refLayer_
->
getOutputGrad
()
->
randomizeUniform
();
dnnLayer_
->
getOutputGrad
()
->
copyFrom
(
*
(
refLayer_
->
getOutputGrad
()));
VLOG
(
lvl_
)
<<
"Random dom Backward Input, TopDiff: "
;
dnnLayer_
->
getOutput
(
CPU_DEVICE
)
.
grad
->
copyFrom
(
*
(
refLayer_
->
getOutputGrad
()));
VLOG
(
lvl_
)
<<
"Random Backward Input, TopDiff: "
;
printMatrix
(
refLayer_
->
getOutputGrad
());
}
void
MKLDNNTester
::
checkForward
()
{
printTopDatas
();
double
delta
=
compareMatrix
(
testLayers_
[
DNN
]
->
getOutputValue
(),
testLayers_
[
REF
]
->
getOutputValue
());
VLOG
(
MKLDNN_ALL
)
<<
"Check Forward"
;
printTopDatas
();
double
delta
=
compareMatrix
(
dnnLayer_
->
getOutput
(
-
1
).
value
,
refLayer_
->
getOutputValue
());
EXPECT_LE
(
fabs
(
delta
),
eps_
);
}
void
MKLDNNTester
::
checkBackwardData
()
{
VLOG
(
MKLDNN_ALL
)
<<
"Check Backward Data"
;
// TODO(TJ): uncomment me when batch norm ready
// const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm";
for
(
size_t
i
=
0
;
i
<
dataLayers_
[
DNN
].
size
();
++
i
)
{
...
...
@@ -144,14 +150,12 @@ void MKLDNNTester::checkBackwardData() {
}
void
MKLDNNTester
::
checkBackwardWgts
()
{
VLOG
(
MKLDNN_ALL
)
<<
"Check Backward Weight"
;
CHECK_EQ
(
parameters_
[
DNN
].
size
(),
parameters_
[
REF
].
size
());
vector
<
VectorPtr
>
dnnWgts
;
// used to temply save mkldnn weights
saveWgt
(
parameters_
[
DNN
],
dnnWgts
);
const
MKLDNNLayerPtr
dnnlayer
=
std
::
dynamic_pointer_cast
<
MKLDNNLayer
>
(
dnnLayer_
);
CHECK
(
dnnlayer
);
dnnlayer
->
convertWeightsToPaddle
();
dnnLayer_
->
convertWeightsToPaddle
();
for
(
size_t
i
=
0
;
i
<
parameters_
[
DNN
].
size
();
++
i
)
{
const
VectorPtr
&
dnn
=
parameters_
[
DNN
][
i
]
->
getBuf
(
PARAMETER_VALUE
);
const
VectorPtr
&
ref
=
parameters_
[
REF
][
i
]
->
getBuf
(
PARAMETER_VALUE
);
...
...
@@ -189,38 +193,38 @@ void MKLDNNTester::restoreWgt(const vector<VectorPtr>& from,
}
// clear parameters grad
void
MKLDNNTester
::
clearWgtDiffs
()
{
void
MKLDNNTester
::
clearWgtDiffs
(
size_t
id
)
{
CHECK_LE
(
id
,
parameters_
.
size
());
for
(
size_t
n
=
0
;
n
<
parameters_
.
size
();
++
n
)
{
for
(
size_t
i
=
0
;
i
<
parameters_
[
n
].
size
();
++
i
)
{
const
VectorPtr
&
grad
=
parameters_
[
n
][
i
]
->
getBuf
(
PARAMETER_GRADIENT
);
if
(
grad
)
{
grad
->
zeroMem
();
if
(
id
==
n
||
id
==
parameters_
.
size
())
{
for
(
size_t
i
=
0
;
i
<
parameters_
[
n
].
size
();
++
i
)
{
const
VectorPtr
&
grad
=
parameters_
[
n
][
i
]
->
getBuf
(
PARAMETER_GRADIENT
);
if
(
grad
)
{
grad
->
zeroMem
();
}
}
}
}
}
void
MKLDNNTester
::
clearBotDiffs
()
{
// dnn and ref
void
MKLDNNTester
::
clearBotDiffs
(
size_t
id
)
{
CHECK_LE
(
id
,
dataLayers_
.
size
());
for
(
size_t
n
=
0
;
n
<
dataLayers_
.
size
();
++
n
)
{
// all inputs layers
for
(
size_t
i
=
0
;
i
<
dataLayers_
[
n
].
size
();
++
i
)
{
dataLayers_
[
n
][
i
]
->
getOutputGrad
()
->
zeroMem
();
if
(
id
==
n
||
id
==
dataLayers_
.
size
())
{
// clear inputs layers of this specific layer
for
(
size_t
i
=
0
;
i
<
dataLayers_
[
n
].
size
();
++
i
)
{
dataLayers_
[
n
][
i
]
->
getOutputGrad
()
->
zeroMem
();
}
}
}
}
void
MKLDNNTester
::
clearBotDiffs
(
int
n
)
{
CHECK_LT
(
n
,
NUM
);
// all inputs layers
for
(
size_t
i
=
0
;
i
<
dataLayers_
[
n
].
size
();
++
i
)
{
dataLayers_
[
n
][
i
]
->
getOutputGrad
()
->
zeroMem
();
}
}
void
MKLDNNTester
::
clearTopDatas
()
{
void
MKLDNNTester
::
clearTopDatas
(
size_t
id
)
{
CHECK_LE
(
id
,
testLayers_
.
size
());
for
(
size_t
i
=
0
;
i
<
testLayers_
.
size
();
++
i
)
{
testLayers_
[
i
]
->
getOutputValue
()
->
zeroMem
();
if
(
id
==
i
||
id
==
testLayers_
.
size
())
{
testLayers_
[
i
]
->
getOutputValue
()
->
zeroMem
();
}
}
}
...
...
@@ -300,16 +304,24 @@ void MKLDNNTester::runOnce() {
checkForward
();
// test backward
// simple updater
UpdateCallback
updateCallback
=
[](
Parameter
*
para
)
{
auto
&
grad
=
para
->
getBuf
(
PARAMETER_GRADIENT
);
auto
&
value
=
para
->
getBuf
(
PARAMETER_VALUE
);
real
lr
=
1e-3
;
value
->
add
(
*
grad
,
lr
);
};
randomTopDiffs
();
dnnLayer_
->
backward
(
nullptr
);
refLayer_
->
backward
(
nullptr
);
dnnLayer_
->
backward
(
updateCallback
);
refLayer_
->
backward
(
updateCallback
);
checkBackwardData
();
checkBackwardWgts
();
// clear buffers
// ref code will addto the diff, dnn code will writeto it
// and clearTopDatas(
) and clearWgtDiffs() should be coverd by test
layers
// and clearTopDatas(
REF) should be coverd by ref
layers
clearBotDiffs
(
REF
);
clearWgtDiffs
(
REF
);
}
void
MKLDNNTester
::
run
(
const
TestConfig
&
dnn
,
...
...
paddle/gserver/tests/MKLDNNTester.h
浏览文件 @
171fee2c
...
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include <vector>
#include "LayerGradUtil.h"
#include "paddle/gserver/layers/MKLDNNBase.h"
#include "paddle/gserver/layers/MKLDNNLayer.h"
namespace
paddle
{
...
...
@@ -40,7 +41,8 @@ protected:
vector
<
LayerMap
>
layerMaps_
;
vector
<
vector
<
ParameterPtr
>>
parameters_
;
vector
<
LayerPtr
>
testLayers_
;
LayerPtr
dnnLayer_
,
refLayer_
;
LayerPtr
refLayer_
;
MKLDNNLayerPtr
dnnLayer_
;
/// run some iterations, all the result should pass
size_t
iter_
;
...
...
@@ -88,10 +90,10 @@ private:
void
checkBackwardData
();
void
checkBackwardWgts
();
void
clearWgtDiffs
();
void
clear
BotDiffs
(
);
void
clearBotDiffs
(
int
n
);
// clear specific layer
void
clearTopDatas
();
// clear specific layer, clear all when id equals NUM
void
clear
WgtDiffs
(
size_t
id
=
NUM
);
void
clearBotDiffs
(
size_t
id
=
NUM
);
void
clearTopDatas
(
size_t
id
=
NUM
);
void
printTopDatas
();
void
printMatrix
(
const
MatrixPtr
&
m
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录