Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
72b2f5bd
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
72b2f5bd
编写于
9月 13, 2018
作者:
Z
zhangyang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
modify Softmax for FPGA track
上级
5301c87a
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
59 addition
and
41 deletion
+59
-41
src/fpga/api.cpp
src/fpga/api.cpp
+24
-5
src/fpga/api.h
src/fpga/api.h
+3
-1
src/operators/feed_op.h
src/operators/feed_op.h
+5
-5
src/operators/kernel/fpga/conv_add_bn_kernel.cpp
src/operators/kernel/fpga/conv_add_bn_kernel.cpp
+1
-1
src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
+1
-1
src/operators/kernel/fpga/conv_add_relu_kernel.cpp
src/operators/kernel/fpga/conv_add_relu_kernel.cpp
+1
-1
src/operators/kernel/fpga/conv_bn_kernel.cpp
src/operators/kernel/fpga/conv_bn_kernel.cpp
+2
-5
src/operators/kernel/fpga/conv_bn_relu_kernel.cpp
src/operators/kernel/fpga/conv_bn_relu_kernel.cpp
+1
-1
src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp
src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp
+1
-1
src/operators/kernel/fpga/fc_relu_kernel.cpp
src/operators/kernel/fpga/fc_relu_kernel.cpp
+1
-1
src/operators/kernel/fpga/fusion_fc_kernel.cpp
src/operators/kernel/fpga/fusion_fc_kernel.cpp
+1
-1
src/operators/kernel/fpga/pool_kernel.cpp
src/operators/kernel/fpga/pool_kernel.cpp
+1
-1
src/operators/kernel/fpga/softmax_kernel.cpp
src/operators/kernel/fpga/softmax_kernel.cpp
+16
-16
test/fpga/test_format_data.cpp
test/fpga/test_format_data.cpp
+1
-1
未找到文件。
src/fpga/api.cpp
浏览文件 @
72b2f5bd
...
...
@@ -181,10 +181,12 @@ int ComputeFPGAConcat(const struct ConcatArgs &args) {
return
0
;
}
int
get_align_image_cw
(
int
cw
)
{
return
align_to_x
(
cw
,
IMAGE_ALIGNMENT
);
}
void
format_image
(
framework
::
Tensor
*
image_tensor
)
{
auto
dims
=
image_tensor
->
dims
();
auto
channel
=
dims
[
1
],
height
=
dims
[
2
],
width
=
dims
[
3
];
auto
data_ptr
=
image_tensor
->
mutable_
data
<
float
>
();
auto
data_ptr
=
image_tensor
->
data
<
float
>
();
size_t
memory_size
=
channel
*
height
*
width
*
sizeof
(
float
);
float
*
new_data
=
(
float
*
)
fpga_malloc
(
memory_size
);
fpga_copy
(
new_data
,
data_ptr
,
memory_size
);
...
...
@@ -192,7 +194,7 @@ void format_image(framework::Tensor *image_tensor) {
image_tensor
->
reset_data_ptr
(
new_data
);
}
void
format_ofm
(
framework
::
Tensor
*
ofm_tensor
)
{
void
format_
fp16_
ofm
(
framework
::
Tensor
*
ofm_tensor
)
{
auto
dims
=
ofm_tensor
->
dims
();
size_t
memory_size
=
0
;
if
(
dims
.
size
()
==
4
)
{
...
...
@@ -209,6 +211,23 @@ void format_ofm(framework::Tensor *ofm_tensor) {
ofm_tensor
->
reset_data_ptr
(
p
);
}
void
format_fp32_ofm
(
framework
::
Tensor
*
ofm_tensor
)
{
auto
dims
=
ofm_tensor
->
dims
();
size_t
memory_size
=
0
;
if
(
dims
.
size
()
==
4
)
{
auto
channel
=
dims
[
1
],
height
=
dims
[
2
],
width
=
dims
[
3
];
memory_size
=
height
*
align_to_x
(
channel
*
width
,
IMAGE_ALIGNMENT
)
*
sizeof
(
float
);
}
else
if
(
dims
.
size
()
==
2
)
{
memory_size
=
align_to_x
(
dims
[
1
],
IMAGE_ALIGNMENT
)
*
sizeof
(
float
);
}
else
{
DLOG
<<
"Wrong ofm dimension"
;
}
auto
p
=
fpga_malloc
(
memory_size
);
memset
(
p
,
0
,
memory_size
);
ofm_tensor
->
reset_data_ptr
(
p
);
}
float
filter_find_max
(
framework
::
Tensor
*
filter_tensor
)
{
auto
filter_ptr
=
filter_tensor
->
data
<
float
>
();
return
filter
::
find_max
(
filter_ptr
,
filter_tensor
->
numel
());
...
...
@@ -242,7 +261,7 @@ void format_filter(framework::Tensor *filter_tensor, float max_value,
int
group_num
)
{
auto
dims
=
filter_tensor
->
dims
();
auto
num
=
dims
[
0
],
channel
=
dims
[
1
],
height
=
dims
[
2
],
width
=
dims
[
3
];
auto
data_ptr
=
filter_tensor
->
mutable_
data
<
float
>
();
auto
data_ptr
=
filter_tensor
->
data
<
float
>
();
size_t
memory_size
=
num
*
channel
*
height
*
width
*
sizeof
(
float
);
auto
new_data
=
(
float
*
)
fpga_malloc
(
memory_size
);
fpga_copy
(
new_data
,
data_ptr
,
memory_size
);
...
...
@@ -277,7 +296,7 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
)
{
auto
input_ptr
=
input
->
data
<
float
>
();
auto
filter_ptr
=
filter
->
data
<
float
>
();
auto
out_ptr
=
out
->
mutable_
data
<
float
>
();
auto
out_ptr
=
out
->
data
<
float
>
();
arg
->
group_num
=
(
uint32_t
)
group_num
;
arg
->
split_num
=
(
uint32_t
)
fpga
::
get_plit_num
(
filter
);
...
...
@@ -300,7 +319,7 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input,
(
uint32_t
*
)
fpga
::
fpga_malloc
(
n
*
sizeof
(
uint32_t
));
arg
->
concat_arg
.
image_out
=
out_ptr
;
const
int
channel
=
(
int
)
out
->
dims
()[
1
];
auto
channel
=
(
int
)
out
->
dims
()[
1
];
int
filter_num_per_div
=
fpga
::
get_filter_num_per_div
(
filter
,
group_num
);
int
element_num
=
fpga
::
get_aligned_filter_element_num
(
filter
->
dims
()[
1
]
*
filter
->
dims
()[
2
]
*
filter
->
dims
()[
3
]);
...
...
src/fpga/api.h
浏览文件 @
72b2f5bd
...
...
@@ -206,8 +206,10 @@ int ComputeFPGAConcat(const struct ConcatArgs& args);
static
inline
int
align_to_x
(
int
num
,
int
x
)
{
return
(
num
+
x
-
1
)
/
x
*
x
;
}
int
get_align_image_cw
(
int
cw
);
void
format_image
(
framework
::
Tensor
*
image_tensor
);
void
format_ofm
(
framework
::
Tensor
*
ofm_tensor
);
// only allocate memory
void
format_fp16_ofm
(
framework
::
Tensor
*
ofm_tensor
);
// only allocate memory
void
format_fp32_ofm
(
framework
::
Tensor
*
ofm_tensor
);
float
filter_find_max
(
framework
::
Tensor
*
filter_tensor
);
int
get_filter_num_per_div
(
framework
::
Tensor
*
filter_tensor
,
int
group_num
);
...
...
src/operators/feed_op.h
浏览文件 @
72b2f5bd
...
...
@@ -45,7 +45,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
void
Init
()
{
Tensor
*
output
=
param_
.
Out
();
fpga
::
format_ofm
(
output
);
fpga
::
format_
fp16_
ofm
(
output
);
}
void
RunImpl
()
const
{
...
...
@@ -53,7 +53,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
auto
input_ptr
=
input
->
data
<
float
>
();
fpga
::
format_image
(
input
);
Tensor
*
output
=
param_
.
Out
();
auto
output_ptr
=
output
->
mutable_
data
<
half
>
();
auto
output_ptr
=
output
->
data
<
half
>
();
fpga
::
BypassArgs
args
;
...
...
@@ -62,9 +62,9 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
args
.
input_layout_type
=
fpga
::
LAYOUT_CHW
;
args
.
output_layout_type
=
fpga
::
LAYOUT_HWC
;
args
.
image
.
address
=
(
void
*
)
input_ptr
;
args
.
image
.
channels
=
input
->
dims
()[
1
];
args
.
image
.
height
=
input
->
dims
()[
2
];
args
.
image
.
width
=
input
->
dims
()[
3
];
args
.
image
.
channels
=
(
uint32_t
)
input
->
dims
()[
1
];
args
.
image
.
height
=
(
uint32_t
)
input
->
dims
()[
2
];
args
.
image
.
width
=
(
uint32_t
)
input
->
dims
()[
3
];
args
.
image
.
pad_height
=
0
;
args
.
image
.
pad_width
=
0
;
args
.
output
.
address
=
output_ptr
;
...
...
src/operators/kernel/fpga/conv_add_bn_kernel.cpp
浏览文件 @
72b2f5bd
...
...
@@ -64,7 +64,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
int
element_num_per_div
=
fpga
::
get_filter_num_per_div
(
filter
,
param
->
Groups
());
fpga
::
format_bias_scale_array
(
&
bs_ptr
,
element_num_per_div
,
channel
);
fpga
::
format_ofm
(
out
);
fpga
::
format_
fp16_
ofm
(
out
);
fpga
::
WrapperConvArgs
conv_arg
;
fpga
::
fill_conv_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
...
...
src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
浏览文件 @
72b2f5bd
...
...
@@ -62,7 +62,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
fpga
::
get_filter_num_per_div
(
filter
,
param
->
Groups
());
fpga
::
format_bias_scale_array
(
&
bs_ptr
,
element_num_per_div
,
channel
);
fpga
::
format_ofm
(
out
);
fpga
::
format_
fp16_
ofm
(
out
);
fpga
::
WrapperConvArgs
conv_arg
;
fpga
::
fill_conv_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
...
...
src/operators/kernel/fpga/conv_add_relu_kernel.cpp
浏览文件 @
72b2f5bd
...
...
@@ -44,7 +44,7 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
fpga
::
get_filter_num_per_div
(
filter
,
param
->
Groups
());
fpga
::
format_bias_scale_array
(
&
bs_ptr
,
element_num_per_div
,
channel
);
fpga
::
format_ofm
(
out
);
fpga
::
format_
fp16_
ofm
(
out
);
fpga
::
WrapperConvArgs
conv_arg
;
fpga
::
fill_conv_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
...
...
src/operators/kernel/fpga/conv_bn_kernel.cpp
浏览文件 @
72b2f5bd
...
...
@@ -15,7 +15,6 @@ limitations under the License. */
#ifdef FUSION_CONVBN_OP
#include "operators/kernel/conv_bn_kernel.h"
#include "fpga/api.h"
namespace
paddle_mobile
{
namespace
operators
{
...
...
@@ -33,10 +32,8 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
const
float
epsilon
=
param
->
Epsilon
();
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
param
->
InputBias
()
->
dims
()[
0
],
"Output channel should be equal to bias number"
);
const
int
channel
=
out
->
dims
()[
1
];
auto
bs_ptr
=
reinterpret_cast
<
float
*>
(
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
)));
auto
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
auto
new_scale
=
new
Tensor
();
auto
new_bias
=
new
Tensor
();
auto
new_scale_ptr
=
new_scale
->
mutable_data
<
float
>
({
channel
});
...
...
@@ -59,7 +56,7 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
fpga
::
get_filter_num_per_div
(
filter
,
param
->
Groups
());
fpga
::
format_bias_scale_array
(
&
bs_ptr
,
element_num_per_div
,
channel
);
fpga
::
format_ofm
(
out
);
fpga
::
format_
fp16_
ofm
(
out
);
fpga
::
WrapperConvArgs
conv_arg
;
fpga
::
fill_conv_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
...
...
src/operators/kernel/fpga/conv_bn_relu_kernel.cpp
浏览文件 @
72b2f5bd
...
...
@@ -56,7 +56,7 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
fpga
::
get_filter_num_per_div
(
filter
,
param
->
Groups
());
fpga
::
format_bias_scale_array
(
&
bs_ptr
,
element_num_per_div
,
channel
);
fpga
::
format_ofm
(
out
);
fpga
::
format_
fp16_
ofm
(
out
);
fpga
::
WrapperConvArgs
conv_arg
;
fpga
::
fill_conv_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
...
...
src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp
浏览文件 @
72b2f5bd
...
...
@@ -27,7 +27,7 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
auto
*
out
=
param
->
Out
();
auto
input_x_ptr
=
input_x
->
data
<
float
>
();
auto
input_y_ptr
=
input_y
->
data
<
float
>
();
fpga
::
format_ofm
(
out
);
fpga
::
format_
fp16_
ofm
(
out
);
auto
out_ptr
=
out
->
mutable_data
<
float
>
();
fpga
::
EWAddArgs
ewaddArgs
;
...
...
src/operators/kernel/fpga/fc_relu_kernel.cpp
浏览文件 @
72b2f5bd
...
...
@@ -49,7 +49,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
int
element_num_per_div
=
fpga
::
get_filter_num_per_div
(
filter
,
1
);
fpga
::
format_bias_scale_array
(
&
bs_ptr
,
element_num_per_div
,
channel
);
fpga
::
format_ofm
(
out
);
fpga
::
format_
fp16_
ofm
(
out
);
fpga
::
WrapperConvArgs
conv_arg
;
fpga
::
fill_conv_arg
(
&
conv_arg
,
input_x
,
out
,
filter
,
relu_enabled
,
1
,
1
,
1
,
0
,
...
...
src/operators/kernel/fpga/fusion_fc_kernel.cpp
浏览文件 @
72b2f5bd
...
...
@@ -50,7 +50,7 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
int
element_num_per_div
=
fpga
::
get_filter_num_per_div
(
filter
,
1
);
fpga
::
format_bias_scale_array
(
&
bs_ptr
,
element_num_per_div
,
channel
);
fpga
::
format_ofm
(
out
);
fpga
::
format_
fp16_
ofm
(
out
);
fpga
::
WrapperConvArgs
conv_arg
;
fpga
::
fill_conv_arg
(
&
conv_arg
,
input_x
,
out
,
filter
,
relu_enabled
,
1
,
1
,
1
,
0
,
...
...
src/operators/kernel/fpga/pool_kernel.cpp
浏览文件 @
72b2f5bd
...
...
@@ -24,7 +24,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
auto
*
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input_ptr
=
input
->
data
<
float
>
();
Tensor
*
output
=
param
->
Output
();
fpga
::
format_ofm
(
output
);
fpga
::
format_
fp16_
ofm
(
output
);
auto
output_ptr
=
output
->
mutable_data
<
float
>
();
vector
<
int
>
ksize
=
param
->
Ksize
();
vector
<
int
>
strides
=
param
->
Strides
();
...
...
src/operators/kernel/fpga/softmax_kernel.cpp
浏览文件 @
72b2f5bd
...
...
@@ -24,22 +24,23 @@ namespace operators {
template
<
>
bool
SoftmaxKernel
<
FPGA
,
float
>::
Init
(
SoftmaxParam
<
FPGA
>
*
param
)
{
const
Tensor
*
input
=
param
->
InputX
(
);
auto
input
=
const_cast
<
Tensor
*>
(
param
->
InputX
()
);
auto
input_ptr
=
input
->
data
<
float
>
();
auto
output_ptr
=
param
->
Out
();
Tensor
*
floatInput
=
new
Tensor
(
*
input
);
auto
float_input
=
new
Tensor
(
*
input
);
fpga
::
format_fp32_ofm
(
float_input
);
fpga
::
BypassArgs
args
;
args
.
input_layout_type
=
fpga
::
LAYOUT_HWC
;
args
.
output_layout_type
=
fpga
::
LAYOUT_CHW
;
args
.
input_data_type
=
fpga
::
DATA_TYPE_FP16
;
args
.
output_data_type
=
fpga
::
DATA_TYPE_FP32
;
args
.
image
.
address
=
(
void
*
)(
input_ptr
)
;
args
.
image
.
height
=
(
uint32_t
)
input
->
dims
()[
0
]
;
args
.
image
.
width
=
(
uint32_t
)
input
->
dims
()[
1
]
;
args
.
image
.
channels
=
1
;
args
.
output
.
address
=
(
void
*
)
floatI
nput
->
mutable_data
<
float
>
();
args
.
image
.
address
=
input_ptr
;
args
.
image
.
height
=
1
;
args
.
image
.
width
=
1
;
args
.
image
.
channels
=
(
uint32_t
)
input
->
dims
()[
1
]
;
args
.
output
.
address
=
float_i
nput
->
mutable_data
<
float
>
();
param
->
SetFloatInput
(
float
I
nput
);
param
->
SetFloatInput
(
float
_i
nput
);
param
->
SetFpgaArgs
(
args
);
return
true
;
}
...
...
@@ -47,17 +48,16 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
template
<
>
void
SoftmaxKernel
<
FPGA
,
float
>::
Compute
(
const
SoftmaxParam
<
FPGA
>
&
param
)
const
{
DLOG
<<
"======================================= FPGA SoftMAX "
"==============================================="
;
const
Tensor
*
in_x
=
param
.
FloatInput
();
Tensor
*
in_x
=
param
.
FloatInput
();
Tensor
*
out
=
param
.
Out
();
fpga
::
fpga_flush
((
void
*
)
in_x
->
data
<
float
>
(),
in_x
->
memory_size
());
fpga
::
PerformBypass
(
param
.
FpgaArgs
());
fpga
::
fpga_invalidate
(
out
->
data
<
float
>
(),
out
->
memory_size
());
fpga
::
fpga_invalidate
(
(
void
*
)
in_x
->
data
<
float
>
(),
(
size_t
)
fpga
::
get_align_image_cw
((
int
)
in_x
->
dims
()[
1
])
*
sizeof
(
float
));
auto
x_dims
=
in_x
->
dims
();
out
->
Resize
(
x_dims
);
math
::
SoftmaxFuntor
<
CPU
,
float
>
()(
in_x
,
out
);
fpga
::
fpga_flush
(
out
->
data
<
float
>
(),
out
->
memory_size
());
}
}
// namespace operators
...
...
test/fpga/test_format_data.cpp
浏览文件 @
72b2f5bd
...
...
@@ -71,7 +71,7 @@ void test_fill_conv_arg() {
fpga
::
format_bias_scale_array
(
&
bs_ptr
,
element_num_per_div
,
1001
);
DLOG
<<
"format ofm"
;
fpga
::
format_ofm
(
&
out
);
fpga
::
format_
fp16_
ofm
(
&
out
);
DLOG
<<
"Build arg"
;
fpga
::
WrapperConvArgs
arg
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录