Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
bfbd066f
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
bfbd066f
编写于
8月 29, 2017
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine
上级
fe51f726
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
118 addition
and
103 deletion
+118
-103
paddle/gserver/layers/MKLDNNFcLayer.cpp
paddle/gserver/layers/MKLDNNFcLayer.cpp
+59
-58
paddle/gserver/layers/MKLDNNFcLayer.h
paddle/gserver/layers/MKLDNNFcLayer.h
+2
-0
paddle/gserver/layers/MKLDNNLayer.h
paddle/gserver/layers/MKLDNNLayer.h
+33
-15
paddle/math/MKLDNNMatrix.cpp
paddle/math/MKLDNNMatrix.cpp
+8
-17
paddle/math/MKLDNNMatrix.h
paddle/math/MKLDNNMatrix.h
+16
-13
未找到文件。
paddle/gserver/layers/MKLDNNFcLayer.cpp
浏览文件 @
bfbd066f
...
...
@@ -77,6 +77,24 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
wgtVal_
->
reorderDataTo
(
wgtVal_
,
dstFmt
,
targetDim
);
}
void
MKLDNNFcLayer
::
convertOutputToOtherDevice
()
{
copyOutputInfoToOtherDevice
();
// find other cpu device and reorder output to cpu device
int
cnt
=
0
;
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
if
(
outputOtherDevice_
[
i
].
deviceId
==
CPU_DEVICE
)
{
// fc cpu output value do not need convert
// just share point
outputOtherDevice_
[
i
].
value
=
output_
.
value
;
++
cnt
;
}
}
if
(
cnt
>
1
)
{
LOG
(
WARNING
)
<<
"should not have more than one CPU devie"
;
}
}
void
MKLDNNFcLayer
::
reshape
()
{
const
Argument
&
input
=
getInput
(
0
,
getPrev
(
0
)
->
getDeviceId
());
int
batchSize
=
input
.
getBatchSize
();
...
...
@@ -116,7 +134,7 @@ void MKLDNNFcLayer::resetFwd() {
const
MatrixPtr
&
bias
=
hasBias
?
biases_
->
getW
()
:
nullptr
;
const
MatrixPtr
&
out
=
output_
.
value
;
if
(
prevIsMKLDNN
())
{
if
(
prevIs
Only
MKLDNN
())
{
const
MatrixPtr
&
in
=
getInputValue
(
0
);
inVal_
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
in
);
CHECK
(
inVal_
)
<<
"Input should be MKLDNNMatrix"
;
...
...
@@ -136,30 +154,21 @@ void MKLDNNFcLayer::resetFwd() {
// change original output value to mkldnn output value
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
outVal_
);
if
(
!
nextIsMKLDNN
())
{
Argument
cpuOutput
;
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
if
(
outputOtherDevice_
[
i
].
deviceId
==
CPU_DEVICE
)
{
cpuOutput
=
outputOtherDevice_
[
i
];
}
}
cpuOutput
.
setFrameHeight
(
output_
.
getFrameHeight
());
cpuOutput
.
setFrameWidth
(
output_
.
getFrameWidth
());
// fc cpu output value do not need convert
cpuOutput
.
value
=
output_
.
value
;
if
(
!
nextIsOnlyMKLDNN
())
{
convertOutputToOtherDevice
();
}
// create forward handle
prop_kind
pk
=
prop_kind
::
forward
;
fc_fwd
::
desc
fwdDesc
=
hasBias
?
fc_fwd
::
desc
(
pk
,
inVal_
->
getMD
(),
wgtVal_
->
getMD
(),
biasVal_
->
getMD
(),
outVal_
->
getMD
())
:
fc_fwd
::
desc
(
pk
,
inVal_
->
getMD
(),
wgtVal_
->
getMD
(),
outVal_
->
getMD
());
fc_fwd
::
desc
fwdDesc
=
hasBias
?
fc_fwd
::
desc
(
pk
,
inVal_
->
getMemoryDesc
(),
wgtVal_
->
getMemoryDesc
(),
biasVal_
->
getMemoryDesc
(),
outVal_
->
getMemoryDesc
())
:
fc_fwd
::
desc
(
pk
,
inVal_
->
getMemoryDesc
(),
wgtVal_
->
getMemoryDesc
(),
outVal_
->
getMemoryDesc
());
fc_fwd
::
primitive_desc
fwdPD
=
fc_fwd
::
primitive_desc
(
fwdDesc
,
engine_
);
if
(
hasBias
)
{
fwd_
.
reset
(
new
fc_fwd
(
fwdPD
,
*
inVal_
,
*
wgtVal_
,
*
biasVal_
,
*
outVal_
));
...
...
@@ -184,36 +193,38 @@ void MKLDNNFcLayer::resetBwd() {
const
MatrixPtr
&
wgt
=
weight_
->
getWGrad
();
const
MatrixPtr
&
bias
=
hasBias
?
biases_
->
getWGrad
()
:
nullptr
;
// TODO(TJ): merge
topdiffs
if
(
nextIsMKLDNN
())
{
// TODO(TJ): merge
outgrad
if
(
nextIs
Only
MKLDNN
())
{
// can not directly cast outputgrad to mkldnnmatrix,
// since each layer can not write the inputgrad to mkldnn inputgrad.
// So just create from matrix with outputvalue format.
const
MatrixPtr
&
out
=
getOutput
(
MKLDNN_DEVICE
).
grad
;
outGrad_
=
MKLDNNMatrix
::
create
(
out
,
outVal_
->
getP
D
());
outGrad_
=
MKLDNNMatrix
::
create
(
out
,
outVal_
->
getP
rimitiveDesc
());
}
else
{
const
MatrixPtr
&
out
=
getOutput
(
CPU_DEVICE
).
grad
;
// fc do not need to convert from cpu device since output always nc
// only need create from cpu device
outGrad_
=
MKLDNNMatrix
::
create
(
out
,
outVal_
->
getP
D
());
outGrad_
=
MKLDNNMatrix
::
create
(
out
,
outVal_
->
getP
rimitiveDesc
());
}
wgtGrad_
=
MKLDNNMatrix
::
create
(
wgt
,
wgtVal_
->
getPD
());
biasGrad_
=
hasBias
?
MKLDNNMatrix
::
create
(
bias
,
biasVal_
->
getPD
())
:
nullptr
;
wgtGrad_
=
MKLDNNMatrix
::
create
(
wgt
,
wgtVal_
->
getPrimitiveDesc
());
biasGrad_
=
hasBias
?
MKLDNNMatrix
::
create
(
bias
,
biasVal_
->
getPrimitiveDesc
())
:
nullptr
;
// create memory primitive desc
fc_fwd
::
desc
fwdDesc
=
fc_fwd
::
desc
(
prop_kind
::
forward
,
inVal_
->
getM
D
(),
wgtGrad_
->
getM
D
(),
outGrad_
->
getM
D
());
inVal_
->
getM
emoryDesc
(),
wgtGrad_
->
getM
emoryDesc
(),
outGrad_
->
getM
emoryDesc
());
fc_fwd
::
primitive_desc
fwdPD
=
fc_fwd
::
primitive_desc
(
fwdDesc
,
engine_
);
fc_bwdWgt
::
desc
bwdWgtDesc
=
hasBias
?
fc_bwdWgt
::
desc
(
inVal_
->
getMD
(),
wgtGrad_
->
getMD
(),
biasGrad_
->
getMD
(),
outGrad_
->
getMD
())
:
fc_bwdWgt
::
desc
(
inVal_
->
getMD
(),
wgtGrad_
->
getMD
(),
outGrad_
->
getMD
());
fc_bwdWgt
::
desc
bwdWgtDesc
=
hasBias
?
fc_bwdWgt
::
desc
(
inVal_
->
getMemoryDesc
(),
wgtGrad_
->
getMemoryDesc
(),
biasGrad_
->
getMemoryDesc
(),
outGrad_
->
getMemoryDesc
())
:
fc_bwdWgt
::
desc
(
inVal_
->
getMemoryDesc
(),
wgtGrad_
->
getMemoryDesc
(),
outGrad_
->
getMemoryDesc
());
fc_bwdWgt
::
primitive_desc
bwdWgtPD
=
fc_bwdWgt
::
primitive_desc
(
bwdWgtDesc
,
engine_
,
fwdPD
);
...
...
@@ -227,30 +238,20 @@ void MKLDNNFcLayer::resetBwd() {
pipelineBwd_
.
push_back
(
*
bwdWgt_
);
/// backward data
i
f
(
prevIsMKLDNN
())
{
const
MatrixPtr
&
in
=
getInputGrad
(
0
,
MKLDNN_DEVICE
);
i
nt
device
=
prevIsOnlyMKLDNN
()
?
MKLDNN_DEVICE
:
CPU_DEVICE
;
const
MatrixPtr
&
in
=
getInputGrad
(
0
,
device
);
if
(
in
==
nullptr
)
{
return
;
}
if
(
getInput
(
0
,
MKLDNN_DEVICE
).
getAllCount
()
>
1
)
{
// TODO(TJ): use outputMaps_ ways when merge topdiff done
}
else
{
inGrad_
=
MKLDNNMatrix
::
create
(
in
,
inVal_
->
getPD
());
}
if
(
getInput
(
0
,
device
).
getAllCount
()
>
1
)
{
// TODO(TJ): use outputMaps_ ways when merge outgrad done
}
else
{
const
MatrixPtr
&
in
=
getInputGrad
(
0
,
CPU_DEVICE
);
if
(
in
==
nullptr
)
{
return
;
}
if
(
getInput
(
0
,
CPU_DEVICE
).
getAllCount
()
>
1
)
{
// TODO(TJ): use outputMaps_ ways when merge topdiff done
}
else
{
inGrad_
=
MKLDNNMatrix
::
create
(
in
,
inVal_
->
getPD
());
}
inGrad_
=
MKLDNNMatrix
::
create
(
in
,
inVal_
->
getPrimitiveDesc
());
}
fc_bwdData
::
desc
bwdDataDesc
=
fc_bwdData
::
desc
(
inVal_
->
getMD
(),
wgtGrad_
->
getMD
(),
outGrad_
->
getMD
());
fc_bwdData
::
desc
bwdDataDesc
=
fc_bwdData
::
desc
(
inVal_
->
getMemoryDesc
(),
wgtGrad_
->
getMemoryDesc
(),
outGrad_
->
getMemoryDesc
());
fc_bwdData
::
primitive_desc
bwdDataPD
=
fc_bwdData
::
primitive_desc
(
bwdDataDesc
,
engine_
,
fwdPD
);
...
...
paddle/gserver/layers/MKLDNNFcLayer.h
浏览文件 @
bfbd066f
...
...
@@ -72,6 +72,8 @@ protected:
* only would be called when needed
*/
void
resetBwd
();
void
convertOutputToOtherDevice
()
override
;
};
}
// namespace paddle
paddle/gserver/layers/MKLDNNLayer.h
浏览文件 @
bfbd066f
...
...
@@ -86,10 +86,7 @@ public:
CHECK
(
FLAGS_use_mkldnn
)
<<
"MkldnnLayers only support use_mkldnn."
<<
"Please set WITH_MKLDNN=ON "
<<
"and set use_mkldnn=True"
;
if
(
useGpu_
==
true
)
{
LOG
(
WARNING
)
<<
"Do not support GPU yet, will change to useGpu = false"
;
useGpu_
=
false
;
}
CHECK
(
!
useGpu_
)
<<
"Do not support GPU yet"
;
// set device id before Layer::init
setDevice
(
MKLDNN_DEVICE
);
...
...
@@ -116,6 +113,12 @@ public:
*/
virtual
void
convertWeightsToPaddle
()
{}
/**
* convert MKLDNN output to other device.
* only support CPU device yet
*/
virtual
void
convertOutputToOtherDevice
()
{}
/**
* print info about sizes
*/
...
...
@@ -147,22 +150,25 @@ public:
protected:
/**
* If next layer only has MKLDNN type.
* Otherwise, only support otherdevice CPU device.
* copy image size and sequence info to other device
*/
bool
nextIsMKLDNN
()
{
void
copyOutputInfoToOtherDevice
()
{
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
CHECK_EQ
(
outputOtherDevice_
[
i
].
deviceId
,
CPU_DEVICE
)
<<
"Only support other device is CPU yet"
;
outputOtherDevice_
[
i
].
setFrameHeight
(
output_
.
getFrameHeight
());
outputOtherDevice_
[
i
].
setFrameWidth
(
output_
.
getFrameWidth
());
outputOtherDevice_
[
i
].
sequenceStartPositions
=
output_
.
sequenceStartPositions
;
outputOtherDevice_
[
i
].
subSequenceStartPositions
=
output_
.
subSequenceStartPositions
;
outputOtherDevice_
[
i
].
cpuSequenceDims
=
output_
.
cpuSequenceDims
;
}
return
outputOtherDevice_
.
size
()
==
0
;
}
/**
* Is previous layer MKLDNN type.
* Otherwise, only support
otherdevice
CPU device.
* Is previous layer
only has
MKLDNN type.
* Otherwise, only support
the previous layer using
CPU device.
*/
bool
prevIsMKLDNN
(
int
index
=
0
)
{
bool
prevIs
Only
MKLDNN
(
int
index
=
0
)
{
int
prevDevice
=
getPrev
(
index
)
->
getDeviceId
();
if
(
prevDevice
==
MKLDNN_DEVICE
)
{
return
true
;
...
...
@@ -173,11 +179,23 @@ protected:
}
}
/**
* If output only has MKLDNN device.
* Otherwise, other devices should only using CPU device.
*/
bool
nextIsOnlyMKLDNN
()
{
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
CHECK_EQ
(
outputOtherDevice_
[
i
].
deviceId
,
CPU_DEVICE
)
<<
"Only support other device is CPU yet"
;
}
return
outputOtherDevice_
.
size
()
==
0
;
}
/**
* Sync input value data
*/
void
syncInputValue
()
{
if
(
prevIsMKLDNN
())
{
if
(
prevIs
Only
MKLDNN
())
{
return
;
}
real
*
iData
=
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
();
...
...
@@ -190,7 +208,7 @@ protected:
* Sync output grad data
*/
void
syncOutputGrad
()
{
if
(
nextIsMKLDNN
())
{
if
(
nextIs
Only
MKLDNN
())
{
return
;
}
...
...
paddle/math/MKLDNNMatrix.cpp
浏览文件 @
bfbd066f
...
...
@@ -31,7 +31,6 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) {
if
(
m
==
nullptr
)
{
size_t
height
=
dims
[
0
];
size_t
width
=
cnts
/
dims
[
0
];
// LOG(INFO) << height << "," << width;
m
=
Matrix
::
create
(
height
,
width
,
false
,
false
);
}
...
...
@@ -40,10 +39,8 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) {
CHECK
(
cpuMatrix
)
<<
"Only support create from CPU matrix yet"
;
CHECK_EQ
(
cnts
,
m
->
getElementCnt
())
<<
"Count size does not match"
;
size_t
width
=
m
->
getWidth
();
size_t
height
=
m
->
getHeight
();
real
*
data
=
m
->
getData
();
return
std
::
make_shared
<
MKLDNNMatrix
>
(
data
,
height
,
width
,
pd
);
return
std
::
make_shared
<
MKLDNNMatrix
>
(
m
->
getData
(),
m
->
getHeight
(),
m
->
getWidth
(),
pd
);
}
MKLDNNMatrixPtr
MKLDNNMatrix
::
create
(
MatrixPtr
m
,
...
...
@@ -51,9 +48,7 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m,
memory
::
format
fmt
,
engine
&
eg
,
mkldnn
::
memory
::
data_type
dtype
)
{
memory
::
desc
md
=
memory
::
desc
(
dims
,
dtype
,
fmt
);
memory
::
primitive_desc
pd
=
memory
::
primitive_desc
(
md
,
eg
);
return
create
(
m
,
pd
);
return
create
(
m
,
memory
::
primitive_desc
(
memory
::
desc
(
dims
,
dtype
,
fmt
),
eg
));
}
void
MKLDNNMatrix
::
reorderDataFrom
(
const
MKLDNNMatrixPtr
&
m
,
...
...
@@ -64,9 +59,7 @@ void MKLDNNMatrix::reorderDataFrom(const MKLDNNMatrixPtr& m,
return
;
}
CHECK_EQ
(
getElementCnt
(),
m
->
getElementCnt
())
<<
"size should equal"
;
real
*
srcData
=
getData
();
real
*
dstData
=
m
->
getData
();
reorderOnce
(
srcData
,
dstData
,
srcFmt
,
dstFmt
,
targetDim
);
reorderOnce
(
getData
(),
m
->
getData
(),
srcFmt
,
dstFmt
,
targetDim
);
}
void
MKLDNNMatrix
::
reorderDataTo
(
const
MKLDNNMatrixPtr
&
m
,
...
...
@@ -77,9 +70,7 @@ void MKLDNNMatrix::reorderDataTo(const MKLDNNMatrixPtr& m,
return
;
}
CHECK_EQ
(
getElementCnt
(),
m
->
getElementCnt
())
<<
"size should equal"
;
real
*
srcData
=
getData
();
real
*
dstData
=
m
->
getData
();
reorderOnce
(
srcData
,
dstData
,
srcFmt
,
dstFmt
,
targetDim
);
reorderOnce
(
getData
(),
m
->
getData
(),
srcFmt
,
dstFmt
,
targetDim
);
}
void
MKLDNNMatrix
::
reorderOnce
(
void
*
srcData
,
...
...
@@ -120,8 +111,9 @@ void MKLDNNMatrix::downSpatial() {
return
;
}
memory
::
dims
srcDims
=
getDims
();
// TODO(TJ): change H(height) and W(width) if support nhwc or more
const
int
H
=
2
,
W
=
3
;
memory
::
dims
srcDims
=
getDims
();
if
(
srcDims
[
H
]
!=
1
||
srcDims
[
W
]
!=
1
)
{
// can not down spatial
return
;
...
...
@@ -141,13 +133,12 @@ void MKLDNNMatrix::downSpatial() {
}
memory
::
desc
md
=
memory
::
desc
(
dstDims
,
getDtype
(),
dstFmt
);
memory
::
primitive_desc
pd
=
memory
::
primitive_desc
(
md
,
getEngine
());
void
*
data
=
getData
();
mkldnn_primitive_t
result
;
mkldnn
::
error
::
wrap_c_api
(
mkldnn_primitive_create
(
&
result
,
pd
.
get
(),
nullptr
,
nullptr
),
"could not create a memory primitive"
);
reset
(
result
);
set_data_handle
(
data
);
set_data_handle
(
getData
()
);
}
}
// namespace paddle
paddle/math/MKLDNNMatrix.h
浏览文件 @
bfbd066f
...
...
@@ -56,8 +56,8 @@ public:
public:
/**
* Reorder this MKLDNNMatrix from other format.
* Support inplace reorder
*
Pay attention
: this function would only reorder the data layout.
* Support inplace reorder
.
*
@note
: this function would only reorder the data layout.
* will NOT change this original dim or format info
*/
void
reorderDataFrom
(
const
MKLDNNMatrixPtr
&
m
,
...
...
@@ -66,8 +66,8 @@ public:
/**
* Reorder this MKLDNNMatrix to other format.
* Support inplace reorder
*
Pay attention
: this function would only reorder the data layout.
* Support inplace reorder
.
*
@note
: this function would only reorder the data layout.
* will NOT change the dst dim or format info
*/
void
reorderDataTo
(
const
MKLDNNMatrixPtr
&
m
,
...
...
@@ -90,18 +90,20 @@ public:
/**
* Get primitive descriptor.
*/
mkldnn
::
memory
::
primitive_desc
getPD
()
{
return
this
->
get_primitive_desc
();
}
mkldnn
::
memory
::
primitive_desc
getPrimitiveDesc
()
{
return
this
->
get_primitive_desc
();
}
/**
* Get memory descriptor.
*/
mkldnn
::
memory
::
desc
getM
D
()
{
return
getPD
().
desc
();
}
mkldnn
::
memory
::
desc
getM
emoryDesc
()
{
return
getPrimitiveDesc
().
desc
();
}
/**
* Get dimensions.
*/
mkldnn
::
memory
::
dims
getDims
()
{
mkldnn
::
memory
::
desc
md
=
getM
D
();
mkldnn
::
memory
::
desc
md
=
getM
emoryDesc
();
const
int
*
src
=
md
.
data
.
dims
;
int
ndims
=
md
.
data
.
ndims
;
mkldnn
::
memory
::
dims
dst
;
...
...
@@ -116,24 +118,25 @@ public:
* Get format.
*/
mkldnn
::
memory
::
format
getFormat
()
{
return
(
mkldnn
::
memory
::
format
)(
getM
D
().
data
.
format
);
return
(
mkldnn
::
memory
::
format
)(
getM
emoryDesc
().
data
.
format
);
}
/**
* Get memory data type.
*/
mkldnn
::
memory
::
data_type
getDtype
()
{
return
(
mkldnn
::
memory
::
data_type
)(
getM
D
().
data
.
data_type
);
return
(
mkldnn
::
memory
::
data_type
)(
getM
emoryDesc
().
data
.
data_type
);
}
/**
* Get engine.
*/
mkldnn
::
engine
getEngine
()
{
return
getP
D
().
get_engine
();
}
mkldnn
::
engine
getEngine
()
{
return
getP
rimitiveDesc
().
get_engine
();
}
protected:
/**
* Do once reorder supported inplace.
* Do reorder once.
* Can support inplace.
*/
void
reorderOnce
(
void
*
srcData
,
void
*
dstData
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录