Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
a096c58e
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a096c58e
编写于
12月 29, 2017
作者:
H
hedaoyuan
提交者:
GitHub
12月 29, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #7034 from hedaoyuan/convolution
GemmConvMobileFunction(optimized for mobile)
上级
d00e1ed5
b7c4b58d
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
290 addition
and
3 deletion
+290
-3
paddle/function/GemmConvOp.cpp
paddle/function/GemmConvOp.cpp
+158
-3
paddle/function/Im2Col.h
paddle/function/Im2Col.h
+50
-0
paddle/function/Im2ColTest.cpp
paddle/function/Im2ColTest.cpp
+82
-0
未找到文件。
paddle/function/GemmConvOp.cpp
浏览文件 @
a096c58e
...
...
@@ -126,14 +126,165 @@ public:
inputData
+=
inputChannels
*
inputHeight
*
inputWidth
;
outputData
+=
outputChannels
*
outputHeight
*
outputWidth
;
}
}
};
#ifdef PADDLE_MOBILE_INFERENCE
if
(
Device
==
DEVICE_TYPE_CPU
)
{
memory_
.
reset
();
/*
* \brief Forward calculation of convolution, optimized for mobile.
*/
template
<
DeviceType
Device
>
class
GemmConvMobileFunction
:
public
ConvFunctionBase
{
public:
void
init
(
const
FuncConfig
&
config
)
override
{
ConvFunctionBase
::
init
(
config
);
}
void
check
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
const
TensorShape
&
input
=
inputs
[
0
].
shape
();
const
TensorShape
&
filter
=
inputs
[
1
].
shape
();
const
TensorShape
&
output
=
outputs
[
0
].
shape
();
checkShape
(
input
,
filter
,
output
);
}
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
CHECK_EQ
(
numInputs_
,
inputs
.
size
());
CHECK_EQ
(
numOutputs_
,
outputs
.
size
());
check
(
inputs
,
outputs
);
// TODO(hedaoyuan): Need to define some index macros,
// to avoid useing 0 and 1.
const
TensorShape
&
input
=
inputs
[
0
].
shape
();
const
TensorShape
&
filter
=
inputs
[
1
].
shape
();
const
TensorShape
&
output
=
outputs
[
0
].
shape
();
real
beta
;
if
(
outputs
[
0
].
getArgType
()
==
ADD_TO
)
{
beta
=
1.0
;
}
else
{
beta
=
0.0
;
}
#endif
size_t
batchSize
=
input
[
0
];
size_t
inputChannels
=
input
[
1
];
size_t
inputHeight
=
input
[
2
];
size_t
inputWidth
=
input
[
3
];
size_t
filterHeight
=
getFilterHeight
(
filter
);
size_t
filterWidth
=
getFilterWidth
(
filter
);
size_t
outputChannels
=
output
[
1
];
size_t
outputHeight
=
output
[
2
];
size_t
outputWidth
=
output
[
3
];
real
*
inputData
=
inputs
[
0
].
data
<
real
>
();
real
*
filterData
=
inputs
[
1
].
data
<
real
>
();
real
*
outputData
=
outputs
[
0
].
data
<
real
>
();
bool
needIm2col
=
isNeedIm2col
(
filter
);
TensorShape
imShape
=
TensorShape
({
inputChannels
/
groups_
,
inputHeight
,
inputWidth
});
TensorShape
colShape
;
real
*
colData
=
NULL
;
size_t
colHeight
=
inputChannels
/
groups_
*
filterHeight
*
filterWidth
;
size_t
colWidth
=
outputHeight
*
outputWidth
;
// Max col matrix height 256, Max col matrix width 1024
size_t
stepColHeight
=
std
::
min
(
colHeight
,
static_cast
<
size_t
>
(
256
));
size_t
stepColWidth
=
std
::
min
(
colWidth
,
static_cast
<
size_t
>
(
2048
));
if
(
needIm2col
)
{
colShape
=
TensorShape
({
inputChannels
/
groups_
,
filterHeight
,
filterWidth
,
outputHeight
,
outputWidth
});
resizeBuffer
<
Device
>
(
stepColHeight
*
stepColWidth
*
sizeof
(
real
));
colData
=
reinterpret_cast
<
real
*>
(
memory_
->
getBuf
());
}
Im2ColMobileFunctor
<
real
>
im2col
;
size_t
inputOffset
=
imShape
.
getElements
();
size_t
outputOffset
=
(
outputChannels
/
groups_
)
*
outputHeight
*
outputWidth
;
size_t
filterOffset
=
filter
.
getElements
()
/
groups_
;
int
nStride
=
colWidth
;
int
kStride
=
colHeight
;
for
(
size_t
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
size_t
g
=
0
;
g
<
groups_
;
g
++
)
{
if
(
needIm2col
)
{
real
beta_
=
beta
;
for
(
size_t
colHeightStart
=
0
;
colHeightStart
<
colHeight
;
colHeightStart
+=
stepColHeight
)
{
for
(
size_t
colWidthStart
=
0
;
colWidthStart
<
colWidth
;
colWidthStart
+=
stepColWidth
)
{
int
N
=
std
::
min
(
colWidth
-
colWidthStart
,
stepColWidth
);
int
K
=
std
::
min
(
colHeight
-
colHeightStart
,
stepColHeight
);
// im2col
im2col
(
inputData
+
g
*
inputOffset
,
imShape
,
colData
,
colShape
,
strideH
(),
strideW
(),
paddingH
(),
paddingW
(),
dilationH
(),
dilationW
(),
colHeightStart
,
K
,
colWidthStart
,
N
);
// gemm
int
M
=
outputChannels
/
groups_
;
BlasGemm
<
Device
,
real
>::
compute
(
false
,
false
,
M
,
N
,
K
,
1.0
f
,
filterData
+
g
*
filterOffset
+
colHeightStart
,
kStride
,
colData
,
N
,
beta_
,
outputData
+
g
*
outputOffset
+
colWidthStart
,
nStride
);
}
beta_
=
1.0
;
}
}
else
{
int
M
=
outputChannels
/
groups_
;
int
N
=
outputHeight
*
outputWidth
;
int
K
=
inputChannels
/
groups_
*
filterHeight
*
filterWidth
;
BlasGemm
<
Device
,
real
>::
compute
(
false
,
false
,
M
,
N
,
K
,
1.0
f
,
filterData
+
g
*
filterOffset
,
K
,
inputData
+
g
*
inputOffset
,
N
,
beta
,
outputData
+
g
*
outputOffset
,
N
);
}
}
inputData
+=
inputChannels
*
inputHeight
*
inputWidth
;
outputData
+=
outputChannels
*
outputHeight
*
outputWidth
;
}
memory_
.
reset
();
}
};
#endif
/*
* \brief Backward input calculation of convolution.
*/
...
...
@@ -348,7 +499,11 @@ public:
}
};
#ifdef PADDLE_MOBILE_INFERENCE
REGISTER_TYPED_FUNC
(
GemmConv
,
CPU
,
GemmConvMobileFunction
);
#else
REGISTER_TYPED_FUNC
(
GemmConv
,
CPU
,
GemmConvFunction
);
#endif
REGISTER_TYPED_FUNC
(
GemmConvGradInput
,
CPU
,
GemmConvGradInputFunction
);
REGISTER_TYPED_FUNC
(
GemmConvGradFilter
,
CPU
,
GemmConvGradFilterFunction
);
#ifdef PADDLE_WITH_CUDA
...
...
paddle/function/Im2Col.h
浏览文件 @
a096c58e
...
...
@@ -98,4 +98,54 @@ public:
int
dilationWidth
=
1
);
};
template
<
class
T
>
class
Im2ColMobileFunctor
{
public:
void
operator
()(
const
T
*
imData
,
const
TensorShape
&
imShape
,
T
*
colData
,
const
TensorShape
&
colShape
,
int
strideHeight
,
int
strideWidth
,
int
paddingHeight
,
int
paddingWidth
,
int
dilationHeight
,
int
dilationWidth
,
int
colHeightStart
,
int
colHeightSize
,
int
colWidthStart
,
int
colWidthSize
)
{
int
inputHeight
=
imShape
[
1
];
int
inputWidth
=
imShape
[
2
];
int
filterHeight
=
colShape
[
1
];
int
filterWidth
=
colShape
[
2
];
int
outputWidth
=
colShape
[
4
];
for
(
int
colh
=
0
;
colh
<
colHeightSize
;
colh
++
)
{
int
wOffset
=
(
colHeightStart
+
colh
)
%
filterWidth
;
int
hOffset
=
((
colHeightStart
+
colh
)
/
filterWidth
)
%
filterHeight
;
int
c_im
=
(
colHeightStart
+
colh
)
/
filterWidth
/
filterHeight
;
for
(
int
colw
=
0
;
colw
<
colWidthSize
;
colw
++
)
{
int
h
=
(
colWidthStart
+
colw
)
/
outputWidth
;
int
w
=
(
colWidthStart
+
colw
)
%
outputWidth
;
int
imRowIdx
=
h
*
strideHeight
+
hOffset
*
dilationHeight
;
int
imColIdx
=
w
*
strideWidth
+
wOffset
*
dilationWidth
;
if
((
imRowIdx
-
paddingHeight
)
<
0
||
(
imRowIdx
-
paddingHeight
)
>=
inputHeight
||
(
imColIdx
-
paddingWidth
)
<
0
||
(
imColIdx
-
paddingWidth
)
>=
inputWidth
)
{
colData
[
colh
*
colWidthSize
+
colw
]
=
static_cast
<
T
>
(
0
);
}
else
{
imRowIdx
+=
c_im
*
inputHeight
-
paddingHeight
;
imColIdx
-=
paddingWidth
;
colData
[
colh
*
colWidthSize
+
colw
]
=
imData
[
imRowIdx
*
inputWidth
+
imColIdx
];
}
}
}
}
};
}
// namespace paddle
paddle/function/Im2ColTest.cpp
浏览文件 @
a096c58e
...
...
@@ -138,4 +138,86 @@ TEST(Im2ColFunctor, GPU) { TestIm2ColFunctor<DEVICE_TYPE_GPU, float>(); }
#endif
template
<
class
T
>
void
TestIm2ColMobileFunctor
()
{
for
(
size_t
channels
:
{
32
})
{
for
(
size_t
inputHeight
:
{
33
,
100
})
{
for
(
size_t
inputWidth
:
{
32
,
96
})
{
for
(
size_t
filterHeight
:
{
5
})
{
for
(
size_t
filterWidth
:
{
7
})
{
for
(
size_t
stride
:
{
2
})
{
for
(
size_t
padding
:
{
1
})
{
for
(
size_t
dilation
:
{
1
,
3
})
{
size_t
filterSizeH
=
(
filterHeight
-
1
)
*
dilation
+
1
;
size_t
filterSizeW
=
(
filterWidth
-
1
)
*
dilation
+
1
;
if
(
inputHeight
+
2
*
padding
<
filterSizeH
||
inputWidth
+
2
*
padding
<
filterSizeW
)
break
;
if
(
padding
>=
filterSizeH
||
padding
>=
filterSizeW
)
break
;
size_t
outputHeight
=
(
inputHeight
-
filterSizeH
+
2
*
padding
)
/
stride
+
1
;
size_t
outputWidth
=
(
inputWidth
-
filterSizeW
+
2
*
padding
)
/
stride
+
1
;
TensorShape
imShape
=
TensorShape
({
channels
,
inputHeight
,
inputWidth
});
TensorShape
colShape1
=
TensorShape
({
channels
,
filterHeight
,
filterWidth
,
outputHeight
,
outputWidth
});
size_t
height
=
channels
*
filterHeight
*
filterWidth
;
size_t
width
=
outputHeight
*
outputWidth
;
VectorPtr
input1
=
Vector
::
create
(
imShape
.
getElements
(),
false
);
VectorPtr
input2
=
Vector
::
create
(
imShape
.
getElements
(),
false
);
MatrixPtr
output1
=
Matrix
::
create
(
height
,
width
,
false
,
false
);
MatrixPtr
output2
=
Matrix
::
create
(
height
,
width
,
false
,
false
);
input1
->
uniform
(
0.001
,
1
);
input2
->
copyFrom
(
*
input1
);
Im2ColFunctor
<
kCFO
,
DEVICE_TYPE_CPU
,
T
>
im2Col1
;
Im2ColMobileFunctor
<
T
>
im2Col2
;
im2Col1
(
input1
->
getData
(),
imShape
,
output1
->
getData
(),
colShape1
,
stride
,
stride
,
padding
,
padding
,
dilation
,
dilation
);
im2Col2
(
input2
->
getData
(),
imShape
,
output2
->
getData
(),
colShape1
,
stride
,
stride
,
padding
,
padding
,
dilation
,
dilation
,
0
,
height
,
0
,
width
);
autotest
::
TensorCheckEqual
(
*
output1
,
*
output2
);
}
}
}
}
}
}
}
}
}
TEST
(
Im2ColFunctor
,
Mobile
)
{
TestIm2ColMobileFunctor
<
float
>
();
}
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录