Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
ed0a564c
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ed0a564c
编写于
1月 11, 2018
作者:
H
hedaoyuan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Optimize GemmConvMobileFunction.
上级
2b202f75
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
88 addition
and
28 deletion
+88
-28
paddle/function/GemmConvOp.cpp
paddle/function/GemmConvOp.cpp
+35
-28
paddle/function/Im2Col.h
paddle/function/Im2Col.h
+53
-0
未找到文件。
paddle/function/GemmConvOp.cpp
浏览文件 @
ed0a564c
...
@@ -178,19 +178,22 @@ public:
...
@@ -178,19 +178,22 @@ public:
real
*
inputData
=
inputs
[
0
].
data
<
real
>
();
real
*
inputData
=
inputs
[
0
].
data
<
real
>
();
real
*
filterData
=
inputs
[
1
].
data
<
real
>
();
real
*
filterData
=
inputs
[
1
].
data
<
real
>
();
real
*
outputData
=
outputs
[
0
].
data
<
real
>
();
real
*
outputData
=
outputs
[
0
].
data
<
real
>
();
real
*
colData
=
NULL
;
bool
needIm2col
=
isNeedIm2col
(
filter
);
bool
needIm2col
=
isNeedIm2col
(
filter
);
TensorShape
imShape
=
TensorShape
imShape
=
TensorShape
({
inputChannels
/
groups_
,
inputHeight
,
inputWidth
});
TensorShape
({
inputChannels
/
groups_
,
inputHeight
,
inputWidth
});
TensorShape
colShape
;
TensorShape
colShape
;
real
*
colData
=
NULL
;
size_t
colHeight
=
inputChannels
/
groups_
*
filterHeight
*
filterWidth
;
// Max col matrix width 4096, Max col matrix size 4M.
size_t
colWidth
=
outputHeight
*
outputWidth
;
size_t
outputHeightSteps
=
// Max col matrix height 256, Max col matrix width 1024
std
::
min
(
std
::
max
(
4096
/
outputWidth
,
(
size_t
)
1
),
outputHeight
);
size_t
stepColHeight
=
std
::
min
(
colHeight
,
static_cast
<
size_t
>
(
256
));
size_t
maxColWidth
=
outputHeightSteps
*
outputWidth
;
size_t
stepColWidth
=
std
::
min
(
colWidth
,
static_cast
<
size_t
>
(
2048
));
size_t
channelSteps
=
std
::
min
(
std
::
max
((
1048576
/
maxColWidth
)
/
filterHeight
*
filterWidth
,
(
size_t
)
1
),
inputChannels
/
groups_
);
size_t
maxColHeight
=
channelSteps
*
filterHeight
*
filterWidth
;
if
(
needIm2col
)
{
if
(
needIm2col
)
{
colShape
=
TensorShape
({
inputChannels
/
groups_
,
colShape
=
TensorShape
({
inputChannels
/
groups_
,
...
@@ -199,7 +202,7 @@ public:
...
@@ -199,7 +202,7 @@ public:
outputHeight
,
outputHeight
,
outputWidth
});
outputWidth
});
resizeBuffer
<
Device
>
(
stepColHeight
*
step
ColWidth
*
sizeof
(
real
));
resizeBuffer
<
Device
>
(
maxColHeight
*
max
ColWidth
*
sizeof
(
real
));
colData
=
reinterpret_cast
<
real
*>
(
memory_
->
getBuf
());
colData
=
reinterpret_cast
<
real
*>
(
memory_
->
getBuf
());
}
}
...
@@ -209,20 +212,24 @@ public:
...
@@ -209,20 +212,24 @@ public:
(
outputChannels
/
groups_
)
*
outputHeight
*
outputWidth
;
(
outputChannels
/
groups_
)
*
outputHeight
*
outputWidth
;
size_t
filterOffset
=
filter
.
getElements
()
/
groups_
;
size_t
filterOffset
=
filter
.
getElements
()
/
groups_
;
int
nStride
=
col
Width
;
int
nStride
=
outputHeight
*
output
Width
;
int
kStride
=
colHeight
;
int
kStride
=
inputChannels
/
groups_
*
filterHeight
*
filterWidth
;
for
(
size_t
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
batchSize
;
i
++
)
{
filterData
=
inputs
[
1
].
data
<
real
>
();
for
(
size_t
g
=
0
;
g
<
groups_
;
g
++
)
{
for
(
size_t
g
=
0
;
g
<
groups_
;
g
++
)
{
if
(
needIm2col
)
{
if
(
needIm2col
)
{
real
beta_
=
beta
;
real
beta_
=
beta
;
for
(
size_t
colHeightStart
=
0
;
colHeightStart
<
colHeight
;
for
(
size_t
ic
=
0
;
ic
<
inputChannels
/
groups_
;
colHeightStart
+=
stepColHeight
)
{
ic
+=
channelSteps
)
{
for
(
size_t
colWidthStart
=
0
;
colWidthStart
<
colWidth
;
int
channels
=
std
::
min
(
inputChannels
/
groups_
-
ic
,
channelSteps
);
colWidthStart
+=
stepColWidth
)
{
for
(
size_t
oh
=
0
;
oh
<
outputHeight
;
oh
+=
outputHeightSteps
)
{
int
N
=
std
::
min
(
colWidth
-
colWidthStart
,
stepColWidth
);
int
height
=
std
::
min
(
outputHeight
-
oh
,
outputHeightSteps
);
int
K
=
std
::
min
(
colHeight
-
colHeightStart
,
stepColHeight
);
int
M
=
outputChannels
/
groups_
;
int
N
=
height
*
outputWidth
;
int
K
=
channels
*
filterHeight
*
filterWidth
;
// im2col
// im2col
im2col
(
inputData
+
g
*
inputOffset
,
im2col
(
inputData
,
imShape
,
imShape
,
colData
,
colData
,
colShape
,
colShape
,
...
@@ -232,13 +239,12 @@ public:
...
@@ -232,13 +239,12 @@ public:
paddingW
(),
paddingW
(),
dilationH
(),
dilationH
(),
dilationW
(),
dilationW
(),
c
olHeightStart
,
c
hannels
,
K
,
oh
,
colWidthStar
t
,
heigh
t
,
N
);
N
);
// gemm
// gemm
int
M
=
outputChannels
/
groups_
;
BlasGemm
<
Device
,
real
>::
compute
(
BlasGemm
<
Device
,
real
>::
compute
(
false
,
false
,
false
,
false
,
...
@@ -246,12 +252,12 @@ public:
...
@@ -246,12 +252,12 @@ public:
N
,
N
,
K
,
K
,
1.0
f
,
1.0
f
,
filterData
+
g
*
filterOffset
+
colHeightStart
,
filterData
+
ic
*
filterHeight
*
filterWidth
,
kStride
,
kStride
,
colData
,
colData
,
N
,
N
,
beta_
,
beta_
,
outputData
+
g
*
outputOffset
+
colWidthStart
,
outputData
+
oh
*
outputWidth
,
nStride
);
nStride
);
}
}
beta_
=
1.0
;
beta_
=
1.0
;
...
@@ -266,17 +272,18 @@ public:
...
@@ -266,17 +272,18 @@ public:
N
,
N
,
K
,
K
,
1.0
f
,
1.0
f
,
filterData
+
g
*
filterOffset
,
filterData
,
K
,
K
,
inputData
+
g
*
inputOffset
,
inputData
,
N
,
N
,
beta
,
beta
,
outputData
+
g
*
outputOffset
,
outputData
,
N
);
N
);
}
}
inputData
+=
inputOffset
;
outputData
+=
outputOffset
;
filterData
+=
filterOffset
;
}
}
inputData
+=
inputChannels
*
inputHeight
*
inputWidth
;
outputData
+=
outputChannels
*
outputHeight
*
outputWidth
;
}
}
memory_
.
reset
();
memory_
.
reset
();
...
...
paddle/function/Im2Col.h
浏览文件 @
ed0a564c
...
@@ -98,6 +98,7 @@ public:
...
@@ -98,6 +98,7 @@ public:
int
dilationWidth
=
1
);
int
dilationWidth
=
1
);
};
};
#if 0
template <class T>
template <class T>
class Im2ColMobileFunctor {
class Im2ColMobileFunctor {
public:
public:
...
@@ -147,5 +148,57 @@ public:
...
@@ -147,5 +148,57 @@ public:
}
}
}
}
};
};
#endif
template
<
class
T
>
class
Im2ColMobileFunctor
{
public:
void
operator
()(
const
T
*
imData
,
const
TensorShape
&
imShape
,
T
*
colData
,
const
TensorShape
&
colShape
,
int
strideHeight
,
int
strideWidth
,
int
paddingHeight
,
int
paddingWidth
,
int
dilationHeight
,
int
dilationWidth
,
int
inputChannels
,
int
colOffset
,
int
colOutputHeight
,
int
colWidth
)
{
int
inputHeight
=
imShape
[
1
];
int
inputWidth
=
imShape
[
2
];
int
filterHeight
=
colShape
[
1
];
int
filterWidth
=
colShape
[
2
];
int
outputWidth
=
colShape
[
4
];
for
(
int
ic
=
0
;
ic
<
inputChannels
;
ic
++
)
{
for
(
int
oh
=
0
;
oh
<
colOutputHeight
;
oh
++
)
{
T
*
dstData
=
colData
+
oh
*
outputWidth
;
for
(
int
fh
=
0
;
fh
<
filterHeight
;
fh
++
)
{
for
(
int
fw
=
0
;
fw
<
filterWidth
;
fw
++
)
{
int
imRowIdx
=
(
oh
+
colOffset
)
*
strideHeight
+
fh
-
paddingHeight
;
if
(
imRowIdx
<
0
||
imRowIdx
>=
inputHeight
)
{
memset
(
dstData
,
0
,
outputWidth
*
sizeof
(
T
));
}
else
{
for
(
int
ow
=
0
;
ow
<
outputWidth
;
ow
++
)
{
int
imColIdx
=
ow
*
strideWidth
+
fw
-
paddingWidth
;
if
(
imColIdx
<
0
||
imColIdx
>=
inputWidth
)
{
dstData
[
ow
]
=
T
(
0
);
}
else
{
dstData
[
ow
]
=
imData
[
imRowIdx
*
inputWidth
+
imColIdx
];
}
}
}
dstData
+=
colWidth
;
}
}
}
colData
+=
filterHeight
*
filterWidth
*
colWidth
;
imData
+=
inputHeight
*
inputWidth
;
}
}
};
}
// namespace paddle
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录