Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
a1cc931d
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
332
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a1cc931d
编写于
5月 08, 2019
作者:
J
jameswu2014
提交者:
qnqinan
5月 08, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
V2-conv-hellocase pass & V1 verify-pass (#1608)
上级
64aa8f05
变更
22
隐藏空白更改
内联
并排
Showing
22 changed file
with
355 addition
and
287 deletion
+355
-287
src/fpga/V2/api.cpp
src/fpga/V2/api.cpp
+94
-56
src/fpga/V2/api.h
src/fpga/V2/api.h
+8
-16
src/fpga/V2/pe.cpp
src/fpga/V2/pe.cpp
+127
-111
src/fpga/common/fpga_common.cpp
src/fpga/common/fpga_common.cpp
+3
-3
src/fpga/common/fpga_common.h
src/fpga/common/fpga_common.h
+48
-19
src/framework/executor.cpp
src/framework/executor.cpp
+3
-1
src/operators/kernel/fpga/V2/conv_add_bn_kernel.cpp
src/operators/kernel/fpga/V2/conv_add_bn_kernel.cpp
+6
-5
src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp
src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp
+10
-10
src/operators/kernel/fpga/V2/conv_add_kernel.cpp
src/operators/kernel/fpga/V2/conv_add_kernel.cpp
+2
-3
src/operators/kernel/fpga/V2/conv_add_relu_kernel.cpp
src/operators/kernel/fpga/V2/conv_add_relu_kernel.cpp
+2
-3
src/operators/kernel/fpga/V2/conv_bn_kernel.cpp
src/operators/kernel/fpga/V2/conv_bn_kernel.cpp
+2
-3
src/operators/kernel/fpga/V2/conv_bn_relu_kernel.cpp
src/operators/kernel/fpga/V2/conv_bn_relu_kernel.cpp
+6
-7
src/operators/kernel/fpga/V2/conv_kernel.cpp
src/operators/kernel/fpga/V2/conv_kernel.cpp
+2
-3
src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp
src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp
+6
-7
src/operators/kernel/fpga/V2/deconv_add_bn_kernel.cpp
src/operators/kernel/fpga/V2/deconv_add_bn_kernel.cpp
+6
-7
src/operators/kernel/fpga/V2/deconv_add_bn_relu_kernel.cpp
src/operators/kernel/fpga/V2/deconv_add_bn_relu_kernel.cpp
+6
-7
src/operators/kernel/fpga/V2/deconv_add_kernel.cpp
src/operators/kernel/fpga/V2/deconv_add_kernel.cpp
+6
-7
src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp
src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp
+6
-7
src/operators/kernel/fpga/V2/deconv_bn_relu_kernel.cpp
src/operators/kernel/fpga/V2/deconv_bn_relu_kernel.cpp
+6
-7
src/operators/kernel/fpga/V2/feed_kernel.cpp
src/operators/kernel/fpga/V2/feed_kernel.cpp
+0
-1
src/operators/kernel/fpga/V2/fusion_fc_kernel.cpp
src/operators/kernel/fpga/V2/fusion_fc_kernel.cpp
+3
-2
src/operators/kernel/fpga/V2/fusion_fc_relu_kernel.cpp
src/operators/kernel/fpga/V2/fusion_fc_relu_kernel.cpp
+3
-2
未找到文件。
src/fpga/V2/api.cpp
浏览文件 @
a1cc931d
...
...
@@ -22,6 +22,7 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
fpga
{
#define USE_RELU 1
#define USE_BIAS 2
void
format_image
(
framework
::
Tensor
*
image_tensor
)
{
...
...
@@ -301,7 +302,9 @@ void expand_conv_arg(ConvArgs *arg) {
ConvArgs
args
=
*
arg
;
auto
fpga_bias_scale_len
=
align_to_x
(
args
.
filter_num
/
args
.
group_num
,
8
)
*
args
.
group_num
;
align_to_x
(
args
.
filter_num
/
args
.
group_num
,
BS_NUM_ALIGNMENT
)
*
args
.
group_num
;
fpga_bias_scale_len
=
fpga_bias_scale_len
/
BIAS_SCALE_DMA_NUM
;
auto
output_height
=
(
args
.
image
.
height
+
args
.
image
.
pad_height
*
2
-
args
.
kernel
.
height
)
/
...
...
@@ -325,7 +328,7 @@ void expand_conv_arg(ConvArgs *arg) {
auto
output_amount_per_row
=
align_to_x
(
(
output_width
-
(
args
.
deconv_tx_param
.
omit_size
)
*
2
)
*
args
.
filter_num
,
IMAGE
_ALIGNMENT
);
RESULT
_ALIGNMENT
);
// find the opt partition strategy
uint64_t
res_win
;
...
...
@@ -335,10 +338,10 @@ void expand_conv_arg(ConvArgs *arg) {
(
args
.
image
.
channels
*
(
args
.
kernel
.
width
+
(
res_win
-
1
)
*
args
.
kernel
.
stride_w
)),
IMAGE_ALIGNMENT
)
/
16
+
IMAGE_ALIGNMENT
+
1
)
*
args
.
kernel
.
height
>
2
048
)
{
2
56
)
{
break
;
}
}
...
...
@@ -350,6 +353,7 @@ void expand_conv_arg(ConvArgs *arg) {
if
(((
res_win
%
2
)
!=
0
)
&&
(
res_win
!=
1
))
{
res_win
=
res_win
-
1
;
}
PADDLE_MOBILE_ENFORCE
(
res_win
>=
2
,
"window too bigger than fpga volume"
);
res_fit
=
res_win
;
auto
block_num
=
(
output_width
+
res_fit
-
1
)
/
res_fit
;
...
...
@@ -375,14 +379,14 @@ void expand_conv_arg(ConvArgs *arg) {
align_to_x
((
args
.
image
.
channels
*
(
args
.
kernel
.
width
+
(
block_len
-
1
)
*
args
.
kernel
.
stride_w
)),
IMAGE_ALIGNMENT
)
/
16
+
IMAGE_ALIGNMENT
+
1
;
auto
image_block_len_last
=
align_to_x
(
(
args
.
image
.
channels
*
(
args
.
kernel
.
width
+
(
block_last
-
1
)
*
args
.
kernel
.
stride_w
)),
IMAGE_ALIGNMENT
)
/
16
+
IMAGE_ALIGNMENT
+
1
;
auto
image_win_cnt
=
block_len
;
auto
image_win_cnt_last
=
block_last
;
...
...
@@ -395,46 +399,85 @@ void expand_conv_arg(ConvArgs *arg) {
(
512
/
(
align_to_x
(
args
.
filter_num
,
4
)
/
4
*
2
)
>
2
)
?
(
512
/
(
align_to_x
(
args
.
filter_num
,
4
)
/
4
*
2
)
-
2
)
:
0
;
//
auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS;
auto
cmd
=
0UL
|
USE_BIAS
;
auto
cmd
=
0UL
|
(
args
.
relu_enabled
?
USE_RELU
:
0
)
|
USE_BIAS
;
//
auto cmd = 0UL | USE_BIAS;
auto
deconv_param
=
((
args
.
deconv_tx_param
.
deconv_en
)
<<
16
)
|
((
args
.
deconv_tx_param
.
sub_conv_num
)
<<
8
)
|
((
args
.
deconv_tx_param
.
omit_size
)
<<
0
);
(
*
arg
).
driver
.
image_address_phy
=
vaddr_to_paddr
(
args
.
image
.
address
);
(
*
arg
).
driver
.
sb_address_phy
=
vaddr_to_paddr
(
args
.
sb_address
);
(
*
arg
).
driver
.
filter_address_phy
=
vaddr_to_paddr
(
args
.
filter_address
);
(
*
arg
).
driver
.
output_address_phy
=
vaddr_to_paddr
(
args
.
output
.
address
)
+
args
.
deconv_tx_param
.
out_addr_offset
;
(
*
arg
).
driver
.
output_height
=
output_height
;
(
*
arg
).
driver
.
output_width
=
output_width
;
(
*
arg
).
driver
.
filter_per_group
=
filter_per_group
;
(
*
arg
).
driver
.
channel_per_group
=
channel_per_group
;
(
*
arg
).
driver
.
image_amount_per_row
=
image_amount_per_row
;
(
*
arg
).
driver
.
image_one_pad_per_row
=
image_one_pad_per_row
;
(
*
arg
).
driver
.
filter_amount_all
=
filter_amount_all
;
(
*
arg
).
driver
.
output_amount_per_row
=
output_amount_per_row
;
(
*
arg
).
driver
.
deconv_param
=
deconv_param
;
// new
(
*
arg
).
driver
.
col_padding_up
=
args
.
image
.
pad_width
*
args
.
image
.
channels
;
(
*
arg
).
driver
.
col_padding_down
=
image_one_pad_per_row
;
(
*
arg
).
driver
.
row_padding_up
=
args
.
image
.
pad_height
;
(
*
arg
).
driver
.
row_padding_down
=
args
.
image
.
pad_height
+
args
.
image
.
height
;
(
*
arg
).
driver
.
image_block_amount_per_row
=
image_block_amount_per_row
;
(
*
arg
).
driver
.
filter_pad_width_mul_channel
=
filter_pad_width_mul_channel
;
(
*
arg
).
driver
.
image_win_cnt
=
image_win_cnt
;
(
*
arg
).
driver
.
image_win_cnt_last
=
image_win_cnt_last
;
(
*
arg
).
driver
.
filter_row
=
args
.
kernel
.
width
*
args
.
image
.
channels
;
(
*
arg
).
driver
.
filter_width
=
args
.
kernel
.
width
;
(
*
arg
).
driver
.
filter_height
=
args
.
kernel
.
height
;
(
*
arg
).
driver
.
skip_window
=
args
.
image
.
channels
*
args
.
kernel
.
stride_w
;
(
*
arg
).
driver
.
stride_h
=
args
.
kernel
.
stride_h
;
(
*
arg
).
driver
.
filter_amount_all
=
filter_amount_all
;
(
*
arg
).
driver
.
prog_full_cnt
=
prog_full_cnt
;
(
*
arg
).
driver
.
filter_align
=
args
.
filter_num
/
(
4
*
PE_COLUMN
)
+
(((
args
.
filter_num
%
(
4
*
PE_COLUMN
)))
?
1
:
0
);
(
*
arg
).
driver
.
filter_num
=
args
.
filter_num
;
(
*
arg
).
driver
.
output_width
=
output_width
;
(
*
arg
).
driver
.
output_amount_per_row
=
output_amount_per_row
;
(
*
arg
).
driver
.
res_row_data_align4_pad
=
res_row_data_align4_pad
;
(
*
arg
).
driver
.
cal_res_num
=
output_height
/
ROW_PARALLEL_NUM
+
((
output_height
%
ROW_PARALLEL_NUM
)
?
1
:
0
)
-
1
;
(
*
arg
).
driver
.
last_cal_res_row_num
=
(
output_height
%
(
ROW_PARALLEL_NUM
))
?
(
output_height
%
(
ROW_PARALLEL_NUM
))
:
(
ROW_PARALLEL_NUM
);
(
*
arg
).
driver
.
post_prog_full_cnt
=
post_prog_full_cnt
;
(
*
arg
).
driver
.
deconv_skip_row
=
ROW_PARALLEL_NUM
*
args
.
deconv_tx_param
.
sub_conv_num
;
// paralvl*deconv_group
(
*
arg
).
driver
.
deconv_res_skip_row
=
args
.
deconv_tx_param
.
sub_conv_num
*
output_amount_per_row
;
// deconv_group * result_amount_per_row
(
*
arg
).
driver
.
deconv_ena
=
args
.
deconv_tx_param
.
deconv_en
;
(
*
arg
).
driver
.
deconv_dump
=
args
.
deconv_tx_param
.
omit_size
;
(
*
arg
).
driver
.
output_address_phy
=
vaddr_to_paddr
(
args
.
output
.
address
)
+
args
.
deconv_tx_param
.
out_addr_offset
;
(
*
arg
).
driver
.
output_height
=
output_height
;
(
*
arg
).
driver
.
result_amount_per_row_multi_para
=
output_amount_per_row
/
RESULT_ALIGNMENT
*
(
args
.
deconv_tx_param
.
deconv_en
?
(
*
arg
).
driver
.
deconv_skip_row
:
ROW_PARALLEL_NUM
);
(
*
arg
).
driver
.
sb_address_phy
=
vaddr_to_paddr
(
args
.
sb_address
);
(
*
arg
).
driver
.
fpga_bias_scale_len
=
fpga_bias_scale_len
;
(
*
arg
).
driver
.
filter_amount_whole
=
filter_amount_all
;
(
*
arg
).
driver
.
filter_address_phy
=
vaddr_to_paddr
(
args
.
filter_address
);
(
*
arg
).
driver
.
filters_amount_whole
=
filter_amount_all
*
(
*
arg
).
driver
.
filter_align
*
(
4
*
PE_COLUMN
);
(
*
arg
).
driver
.
image_address_phy
=
vaddr_to_paddr
(
args
.
image
.
address
);
(
*
arg
).
driver
.
image_hight
=
args
.
image
.
height
;
(
*
arg
).
driver
.
image_amount_per_row
=
image_amount_per_row
;
(
*
arg
).
driver
.
image_amount_per_row_multi_win_first
=
image_amount_per_row_multi_win_first
;
(
*
arg
).
driver
.
image_amount_per_row_multi_win
=
image_amount_per_row_multi_win
;
(
*
arg
).
driver
.
filter_pad_hight
=
args
.
image
.
pad_height
;
(
*
arg
).
driver
.
image_block_num
=
image_block_num
;
(
*
arg
).
driver
.
image_block_len
=
image_block_len
;
(
*
arg
).
driver
.
image_block_len_last
=
image_block_len_last
;
(
*
arg
).
driver
.
image_win_cnt
=
image_win_cnt
;
(
*
arg
).
driver
.
image_win_cnt_last
=
image_win_cnt_last
;
(
*
arg
).
driver
.
res_row_data_align4_pad
=
res_row_data_align4_pad
;
(
*
arg
).
driver
.
prog_full_cnt
=
prog_full_cnt
;
(
*
arg
).
driver
.
post_prog_full_cnt
=
post_prog_full_cnt
;
(
*
arg
).
driver
.
fpga_bias_scale_len
=
fpga_bias_scale_len
;
(
*
arg
).
driver
.
cmd
=
cmd
;
(
*
arg
).
driver
.
deconv_param
=
deconv_param
;
}
// expand_conv_arg()
void
expand_EW_arg
(
EWAddArgs
*
arg
)
{
EWAddArgs
args
=
*
arg
;
uint64_t
cmd
=
0
;
uint64_t
cmd
=
args
.
relu_enabled
?
USE_RELU
:
0
;
uint64_t
datalen
=
(
uint64_t
)
args
.
image0
.
width
*
(
uint64_t
)
args
.
image0
.
height
*
(
uint64_t
)
args
.
image0
.
channels
;
...
...
@@ -462,10 +505,8 @@ void expand_EW_arg(EWAddArgs *arg) {
void
fill_split_arg
(
struct
SplitConvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
ActivationType
activation_enable
,
int16_t
leaky_relu_negative_slope
,
int
group_num
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
)
{
bool
relu_enabled
,
int
group_num
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
)
{
auto
input_ptr
=
input
->
data
<
int8_t
>
();
auto
filter_ptr
=
filter
->
data
<
int8_t
>
();
auto
out_ptr
=
out
->
data
<
int8_t
>
();
...
...
@@ -473,6 +514,7 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
arg
->
group_num
=
(
uint32_t
)
group_num
;
// Either group_num or split_num = 1;
PADDLE_MOBILE_ENFORCE
(
group_num
==
1
,
"group_num is not equal to 1"
);
arg
->
split_num
=
group_num
==
1
?
(
uint32_t
)
get_plit_num
(
filter
)
:
1
;
arg
->
filter_num
=
(
uint32_t
)
filter
->
dims
()[
0
];
arg
->
output
.
address
=
out_ptr
;
...
...
@@ -511,9 +553,7 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
filter
->
dims
()[
3
]));
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
arg
->
conv_arg
[
i
].
output
.
activation
.
activation_type
=
activation_enable
;
arg
->
conv_arg
[
i
].
output
.
activation
.
leaky_relu_negative_slope
=
leaky_relu_negative_slope
;
arg
->
conv_arg
[
i
].
relu_enabled
=
relu_enabled
;
arg
->
conv_arg
[
i
].
group_num
=
(
uint32_t
)
group_num
;
arg
->
conv_arg
[
i
].
kernel
.
stride_h
=
(
uint32_t
)
stride_h
;
arg
->
conv_arg
[
i
].
kernel
.
stride_w
=
(
uint32_t
)
stride_w
;
...
...
@@ -585,9 +625,8 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
void
fill_deconv_arg
(
struct
DeconvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
ActivationType
activation_enable
,
int16_t
leaky_relu_negative_slope
,
int
group_num
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
bool
relu_enabled
,
int
group_num
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
)
{
auto
input_ptr
=
input
->
data
<
int8_t
>
();
auto
filter_ptr
=
filter
->
data
<
int8_t
>
();
...
...
@@ -713,12 +752,14 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
}
for
(
int
j
=
0
;
j
<
split_num
;
++
j
)
{
arg
->
split_conv_args
[
i
]
->
conv_arg
[
j
].
output
.
activation
.
activation_type
=
activation_enable
;
arg
->
split_conv_args
[
i
]
->
conv_arg
[
j
]
.
output
.
activation
.
leaky_relu_negative_slope
=
leaky_relu_negative_slope
;
// arg->split_conv_args[i]->conv_arg[j].output.activation.activation_type
// =
// activation_enable;
// arg->split_conv_args[i]
// ->conv_arg[j]
// .output.activation.leaky_relu_negative_slope =
// leaky_relu_negative_slope;
arg
->
split_conv_args
[
i
]
->
conv_arg
[
j
].
relu_enabled
=
relu_enabled
;
arg
->
split_conv_args
[
i
]
->
conv_arg
[
j
].
group_num
=
(
uint32_t
)
group_num
;
arg
->
split_conv_args
[
i
]
->
conv_arg
[
j
].
kernel
.
width
=
...
...
@@ -831,16 +872,14 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
void
fill_dwconv_arg
(
struct
DWconvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
ActivationType
activation_enable
,
int16_t
leaky_relu_negative_slope
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bias_ptr
)
{
bool
relu_enabled
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bias_ptr
)
{
auto
filter_ptr
=
filter
->
data
<
int16_t
>
();
auto
input_ptr
=
input
->
data
<
int8_t
>
();
auto
output_ptr
=
out
->
mutable_data
<
int8_t
>
();
arg
->
sub_conv_num
=
1
;
arg
->
output
.
activation
.
activation_type
=
activation_enable
;
arg
->
output
.
activation
.
leaky_relu_negative_slope
=
leaky_relu_negative_slop
e
;
arg
->
relu_enabled
=
relu_enabled
;
// arg->output.activation.activation_type = activation_enabl
e;
arg
->
bias_address
=
bias_ptr
;
arg
->
filter_address
=
filter_ptr
;
arg
->
kernel
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
...
...
@@ -860,10 +899,8 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
void
fill_DWDeconv_arg
(
struct
DWDeconvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
ActivationType
activation_enable
,
int16_t
leaky_relu_negative_slope
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bias_ptr
)
{
bool
relu_enabled
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bias_ptr
)
{
auto
filter_ptr
=
filter
->
data
<
int8_t
>
();
auto
input_ptr
=
input
->
data
<
int8_t
>
();
...
...
@@ -913,10 +950,11 @@ void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input,
arg
->
dw_conv_args
.
push_back
(
std
::
make_shared
<
DWconvArgs
>
());
arg
->
dw_conv_args
[
i
]
->
sub_conv_num
=
sub_conv_num
;
// arg->dw_conv_args[i]->relu_enabled = relu_enabled;
arg
->
dw_conv_args
[
i
]
->
output
.
activation
.
activation_type
=
activation_enable
;
arg
->
dw_conv_args
[
i
]
->
output
.
activation
.
leaky_relu_negative_slope
=
leaky_relu_negative_slope
;
arg
->
dw_conv_args
[
i
]
->
relu_enabled
=
relu_enabled
;
// arg->dw_conv_args[i]->output.activation.activation_type =
// activation_enable;
// arg->dw_conv_args[i]->output.activation.leaky_relu_negative_slope =
// leaky_relu_negative_slope;
arg
->
dw_conv_args
[
i
]
->
bias_address
=
bias_ptr
;
arg
->
dw_conv_args
[
i
]
->
filter_address
=
...
...
src/fpga/V2/api.h
浏览文件 @
a1cc931d
...
...
@@ -48,28 +48,20 @@ void format_concat_output(framework::Tensor* out, int height, int width,
void
fill_split_arg
(
struct
SplitConvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
ActivationType
activation_enable
,
int16_t
leaky_relu_negative_slope
,
int
group_num
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
bool
relu_enabled
,
int
group_num
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
void
fill_deconv_arg
(
struct
DeconvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
ActivationType
activation_enable
,
int16_t
leaky_relu_negative_slope
,
int
group_num
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
bool
relu_enabled
,
int
group_num
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
void
fill_dwconv_arg
(
struct
DWconvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
ActivationType
activation_enable
,
int16_t
leaky_relu_negative_slope
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bias_ptr
);
bool
relu_enabled
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bias_ptr
);
void
fill_DWDeconv_arg
(
struct
DWDeconvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
ActivationType
activation_enable
,
int16_t
leaky_relu_negative_slope
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
bool
relu_enabled
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
void
format_deconv_filter
(
framework
::
Tensor
*
filter_tensor
,
float
max_value
,
int
group_num
,
int
stride
);
...
...
src/fpga/V2/pe.cpp
浏览文件 @
a1cc931d
...
...
@@ -115,6 +115,19 @@ using namespace std; // NOLINT
/*conv*/
#define REG_CONV_CMD 0xC00
#define REG_CONV_REG0 0xC08
#define REG_CONV_REG1 0xC10
#define REG_CONV_REG2 0xC18
#define REG_CONV_REG3 0xC20
#define REG_CONV_REG4 0xC28
#define REG_CONV_REG5 0xC30
#define REG_CONV_REG6 0xC38
#define REG_CONV_REG7 0xC40
#define REG_CONV_REG8 0xC48
#define REG_CONV_REG9 0xC50
#define REG_CONV_REG10 0xC58
#define REG_CONV_REG11 0xC60
#define REG_CONV_IMAGE_BASE_ADDR 0xC08
#define REG_CONV_FILTER_BASE_ADDR 0xC10
#define REG_CONV_SB_BASE_ADDR 0xC18
...
...
@@ -194,7 +207,7 @@ int ComputeFpgaConv(const struct SplitConvArgs &args) {
int
ComputeBasicConv
(
const
struct
ConvArgs
&
args
)
{
#ifdef FPGA_PRINT_MODE
DLOG
<<
"======Compute Basic Conv======"
;
// DLOG << " relu_enabled:" << args.relu_enabled
DLOG
<<
" relu_enabled:"
<<
args
.
relu_enabled
;
DLOG
<<
" sb_address:"
<<
args
.
sb_address
<<
" filter_address:"
<<
args
.
filter_address
<<
" filter_num:"
<<
args
.
filter_num
...
...
@@ -218,23 +231,23 @@ int ComputeBasicConv(const struct ConvArgs &args) {
int
ret
=
0
;
uint64_t
output_scale
=
0
;
uint64_t
reg_ActivationArgs
=
0
;
//
uint64_t reg_ActivationArgs = 0;
// active function:{none,leakeyrelu,sigmoid,tanh}
ActivationArgs
active_args
;
//
ActivationArgs active_args;
// active_args.activation_type = LEAKYRELU;
active_args
.
activation_type
=
args
.
output
.
activation
.
activation_type
;
//
active_args.activation_type = args.output.activation.activation_type;
active_args
.
leaky_relu_negative_slope
=
args
.
output
.
activation
.
leaky_relu_negative_slope
;
//
active_args.leaky_relu_negative_slope =
//
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs
=
(
uint64_t
(
active_args
.
activation_type
)
<<
32
)
|
active_args
.
leaky_relu_negative_slope
;
//
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
//
active_args.leaky_relu_negative_slope;
DLOG
<<
" activation_type:"
<<
active_args
.
activation_type
<<
" leaky_relu_negative_slope:"
<<
active_args
.
leaky_relu_negative_slope
;
DLOG
<<
" reg_ActivationArgs:"
<<
reg_ActivationArgs
;
//
DLOG << " activation_type:" << active_args.activation_type
//
<< " leaky_relu_negative_slope:"
//
<< active_args.leaky_relu_negative_slope;
//
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
pthread_mutex_lock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
if
(
ERROR
==
g_fpgainfo
.
pe_data
->
pes
[
PE_IDX_CONV
]
->
status
)
{
...
...
@@ -243,63 +256,71 @@ int ComputeBasicConv(const struct ConvArgs &args) {
pthread_mutex_unlock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
return
ret
;
}
// new
reg_writeq
((
args
.
driver
.
row_padding_down
<<
45
)
|
(
args
.
driver
.
row_padding_up
<<
34
)
|
(
args
.
driver
.
col_padding_down
<<
17
)
|
args
.
driver
.
col_padding_up
,
REG_CONV_REG0
);
reg_writeq
((
args
.
driver
.
image_win_cnt_last
<<
50
)
|
(
args
.
driver
.
image_win_cnt
<<
39
)
|
(
args
.
driver
.
image_block_amount_per_row
<<
20
)
|
args
.
driver
.
filter_pad_width_mul_channel
,
REG_CONV_REG1
);
reg_writeq
((
args
.
driver
.
stride_h
<<
48
)
|
(
args
.
driver
.
skip_window
<<
28
)
|
(
args
.
driver
.
filter_row
<<
8
)
|
(
args
.
driver
.
filter_height
<<
4
)
|
args
.
driver
.
filter_width
,
REG_CONV_REG2
);
reg_writeq
((
args
.
driver
.
filter_num
<<
42
)
|
(
args
.
driver
.
filter_align
<<
26
)
|
(
args
.
driver
.
prog_full_cnt
<<
16
)
|
args
.
driver
.
filter_amount_all
,
REG_CONV_REG3
);
reg_writeq
((
args
.
driver
.
post_prog_full_cnt
<<
54
)
|
(
args
.
driver
.
last_cal_res_row_num
<<
50
)
|
(
args
.
driver
.
cal_res_num
<<
39
)
|
(
args
.
driver
.
res_row_data_align4_pad
<<
35
)
|
(
args
.
driver
.
output_amount_per_row
<<
16
)
|
args
.
driver
.
output_width
,
REG_CONV_REG4
);
reg_writeq
((
args
.
driver
.
deconv_dump
<<
40
)
|
(
args
.
driver
.
deconv_ena
<<
39
)
|
(
args
.
driver
.
deconv_res_skip_row
<<
7
)
|
args
.
driver
.
deconv_skip_row
,
REG_CONV_REG5
);
reg_writeq
((
args
.
driver
.
result_amount_per_row_multi_para
<<
43
)
|
(
args
.
driver
.
output_height
<<
32
)
|
args
.
driver
.
output_address_phy
,
REG_CONV_REG6
);
reg_writeq
((
args
.
driver
.
filter_amount_whole
<<
48
)
|
(
args
.
driver
.
fpga_bias_scale_len
<<
32
)
|
args
.
driver
.
sb_address_phy
,
REG_CONV_REG7
);
reg_writeq
(
reg_ActivationArgs
,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
// active functoion
reg_writeq
(
output_scale
,
REG_SCALE_PARAMETER
);
reg_writeq
(
((
uint64_t
)
args
.
image
.
height
)
|
(((
uint64_t
)
args
.
image
.
width
)
<<
32
),
REG_CONV_IMAGE_PIXEL
);
reg_writeq
(
((
uint64_t
)
args
.
kernel
.
height
)
|
(((
uint64_t
)
args
.
kernel
.
width
)
<<
32
),
REG_CONV_FILTER_PIXEL
);
uint64_t
output_height_fraction
=
args
.
driver
.
output_height
/
ROW_PARALLEL_NUM
;
uint64_t
output_height_remainder
=
args
.
driver
.
output_height
%
ROW_PARALLEL_NUM
;
reg_writeq
(
args
.
driver
.
output_height
|
(
output_height_fraction
<<
16
)
|
(
output_height_remainder
<<
26
)
|
(
args
.
driver
.
output_width
<<
32
),
REG_CONV_RESULT_PIXEL
);
reg_writeq
(((
uint64_t
)
args
.
image
.
pad_height
)
|
(((
uint64_t
)
args
.
image
.
pad_width
)
<<
32
),
REG_CONV_PAD_PIXEL
);
reg_writeq
(((
uint64_t
)
args
.
kernel
.
stride_h
)
|
(((
uint64_t
)
args
.
kernel
.
stride_w
)
<<
32
),
REG_CONV_STEP_PIXEL
);
reg_writeq
((
uint64_t
)
args
.
group_num
,
REG_CONV_GROUP_NUMBER
);
reg_writeq
((
uint64_t
)
args
.
filter_num
,
REG_CONV_FILTER_NUMBER
);
reg_writeq
((
uint64_t
)
args
.
image
.
channels
,
REG_CONV_CHANNEL_NUMBER
);
reg_writeq
(
*
(
uint64_t
*
)
args
.
image
.
scale_address
,
// NOLINT
REG_CONV_IMAGE_SCALE
);
reg_writeq
(
*
(
uint64_t
*
)
args
.
filter_scale_address
,
// NOLINT
REG_CONV_FILTER_SCALE
);
reg_writeq
(
args
.
driver
.
image_address_phy
,
REG_CONV_IMAGE_BASE_ADDR
);
reg_writeq
(
args
.
driver
.
filter_address_phy
,
REG_CONV_FILTER_BASE_ADDR
);
reg_writeq
(
args
.
driver
.
sb_address_phy
,
REG_CONV_SB_BASE_ADDR
);
reg_writeq
(
args
.
driver
.
output_address_phy
,
REG_CONV_RESULT_BASE_ADDR
);
reg_writeq
(
args
.
driver
.
filter_per_group
,
REG_CONV_FILTER_PER_GROUP
);
reg_writeq
(
args
.
driver
.
channel_per_group
,
REG_CONV_CHANNEL_PER_GROUP
);
reg_writeq
(
args
.
driver
.
image_amount_per_row
,
REG_CONV_IMAGE_AMOUNT_PER_ROW
);
reg_writeq
(
args
.
driver
.
image_one_pad_per_row
,
REG_CONV_IMAGE_ONE_PAD_PER_ROW
);
reg_writeq
(
args
.
driver
.
filter_amount_all
,
REG_CONV_FILTER_AMOUNT_ALL
);
reg_writeq
(
args
.
driver
.
output_amount_per_row
,
REG_CONV_RESULT_AMOUNT_PER_ROW
);
reg_writeq
(
args
.
driver
.
image_block_amount_per_row
,
0xca8
);
reg_writeq
(
args
.
driver
.
filter_pad_width_mul_channel
,
0xcb0
);
reg_writeq
(
args
.
driver
.
image_amount_per_row_multi_win_first
,
0xcb8
);
reg_writeq
(
args
.
driver
.
image_amount_per_row_multi_win
,
0xcc0
);
reg_writeq
(
args
.
driver
.
image_block_num
,
0xcc8
);
reg_writeq
(
args
.
driver
.
image_block_len
,
0xcd0
);
reg_writeq
(
args
.
driver
.
image_block_len_last
,
0xcd8
);
reg_writeq
(
args
.
driver
.
image_win_cnt
,
0xce0
);
reg_writeq
(
args
.
driver
.
image_win_cnt_last
,
0xce8
);
reg_writeq
(
args
.
driver
.
res_row_data_align4_pad
,
0xcf8
);
reg_writeq
(
args
.
driver
.
prog_full_cnt
,
0xd08
);
reg_writeq
(
args
.
driver
.
post_prog_full_cnt
,
0xd10
);
reg_writeq
(
args
.
driver
.
deconv_param
,
0xd18
);
reg_writeq
(
args
.
driver
.
fpga_bias_scale_len
/
4
,
0xd20
);
(
args
.
driver
.
filters_amount_whole
<<
32
)
|
args
.
driver
.
filter_address_phy
,
REG_CONV_REG8
);
reg_writeq
((
args
.
driver
.
image_amount_per_row
<<
43
)
|
(
args
.
driver
.
image_hight
<<
32
)
|
args
.
driver
.
image_address_phy
,
REG_CONV_REG9
);
reg_writeq
((
args
.
driver
.
filter_pad_hight
<<
46
)
|
(
args
.
driver
.
image_amount_per_row_multi_win
<<
23
)
|
args
.
driver
.
image_amount_per_row_multi_win_first
,
REG_CONV_REG10
);
reg_writeq
((
args
.
driver
.
image_block_num
<<
48
)
|
(
args
.
driver
.
image_block_len
<<
24
)
|
args
.
driver
.
image_block_len_last
,
REG_CONV_REG11
);
reg_writeq
(
args
.
driver
.
cmd
,
REG_CONV_CMD
);
if
(
0
!=
fpga_regpoll
(
REG_INTERRUPT
,
INTERRUPT_CONV
,
PE_IRQ_TIMEOUT
))
{
g_fpgainfo
.
pe_data
->
pes
[
PE_IDX_CONV
]
->
status
=
ERROR
;
...
...
@@ -307,12 +328,7 @@ int ComputeBasicConv(const struct ConvArgs &args) {
DLOG
<<
"Conv Wait Irq Timeout!"
;
PADDLE_MOBILE_ENFORCE
(
0
,
"Conv Wait Irq Timeout"
);
}
output_scale
=
reg_readq
(
REG_SCALE_PARAMETER
);
output_scale
=
(
output_scale
<<
32
)
|
(
output_scale
>>
32
);
fpga_copy
(
args
.
output
.
scale_address
,
&
output_scale
,
sizeof
(
float
)
*
2
);
active_args
.
activation_type
=
NONE
;
reg_writeq
(
reg_ActivationArgs
,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
DLOG
<<
"after reg poll"
;
pthread_mutex_unlock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
...
...
@@ -350,22 +366,22 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
uint64_t
image_physical_address
=
0
;
uint64_t
output_physical_address
=
0
;
uint64_t
reg_ActivationArgs
=
0
;
//
uint64_t reg_ActivationArgs = 0;
// active function:{none,leakeyrelu,sigmoid,tanh}
ActivationArgs
active_args
;
//
ActivationArgs active_args;
// active_args.activation_type = LEAKYRELU;
active_args
.
activation_type
=
args
.
output
.
activation
.
activation_type
;
//
active_args.activation_type = args.output.activation.activation_type;
active_args
.
leaky_relu_negative_slope
=
args
.
output
.
activation
.
leaky_relu_negative_slope
;
//
active_args.leaky_relu_negative_slope =
//
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs
=
(
uint64_t
(
active_args
.
activation_type
)
<<
32
)
|
active_args
.
leaky_relu_negative_slope
;
//
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
//
active_args.leaky_relu_negative_slope;
DLOG
<<
" activation_type:"
<<
active_args
.
activation_type
<<
" leaky_relu_negative_slope:"
<<
active_args
.
leaky_relu_negative_slope
;
DLOG
<<
" reg_ActivationArgs:"
<<
reg_ActivationArgs
;
//
DLOG << " activation_type:" << active_args.activation_type
//
<< " leaky_relu_negative_slope:"
//
<< active_args.leaky_relu_negative_slope;
//
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
image_physical_address
=
vaddr_to_paddr_driver
(
args
.
image
.
address
);
output_physical_address
=
vaddr_to_paddr_driver
(
args
.
output
.
address
);
...
...
@@ -417,10 +433,10 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
return
ret
;
}
reg_writeq
(
reg_ActivationArgs
,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
// active functoion
//
reg_writeq(reg_ActivationArgs,
//
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq
(
output_scale
,
REG_SCALE_PARAMETER
);
//
reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq
(
image_physical_address
,
REG_POOLING_IMAGE_BASE_ADDR
);
reg_writeq
(
output_physical_address
,
REG_POOLING_RESULT_BASE_ADDR
);
reg_writeq
(
...
...
@@ -462,12 +478,12 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
DLOG
<<
"after reg poll"
;
// *(args.output.scale_address) = reg_readq(REG_SCALE_PARAMETER);
output_scale
=
reg_readq
(
REG_SCALE_PARAMETER
);
output_scale
=
(
output_scale
<<
32
)
|
(
output_scale
>>
32
);
fpga_copy
(
args
.
output
.
scale_address
,
&
output_scale
,
sizeof
(
float
)
*
2
);
//
output_scale = reg_readq(REG_SCALE_PARAMETER);
//
output_scale = (output_scale << 32) | (output_scale >> 32);
//
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
active_args
.
activation_type
=
NONE
;
reg_writeq
(
reg_ActivationArgs
,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
//
active_args.activation_type = NONE;
//
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
...
...
@@ -479,7 +495,7 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
int
ComputeFpgaEWAdd
(
const
struct
EWAddArgs
&
args
)
{
#ifdef FPGA_PRINT_MODE
DLOG
<<
"=============ComputeFpgaEWAdd==========="
;
// DLOG << " relu_enabled:" << args.relu_enabled
DLOG
<<
" relu_enabled:"
<<
args
.
relu_enabled
;
DLOG
<<
" const0:"
<<
fp16_2_fp32
(
int16_t
(
args
.
const0
))
<<
" const1:"
<<
fp16_2_fp32
(
int16_t
(
args
.
const1
));
DLOG
<<
" image0_address:"
<<
args
.
image0
.
address
...
...
@@ -503,17 +519,17 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
int
ret
=
0
;
uint64_t
output_scale
=
0
;
uint64_t
reg_ActivationArgs
=
0
;
ActivationArgs
active_args
;
active_args
.
activation_type
=
args
.
output
.
activation
.
activation_type
;
active_args
.
leaky_relu_negative_slope
=
args
.
output
.
activation
.
leaky_relu_negative_slope
;
reg_ActivationArgs
=
(
uint64_t
(
active_args
.
activation_type
)
<<
32
)
|
active_args
.
leaky_relu_negative_slope
;
DLOG
<<
" activation_type:"
<<
active_args
.
activation_type
<<
" leaky_relu_negative_slope:"
<<
active_args
.
leaky_relu_negative_slope
;
DLOG
<<
" reg_ActivationArgs:"
<<
reg_ActivationArgs
;
//
uint64_t reg_ActivationArgs = 0;
//
ActivationArgs active_args;
//
active_args.activation_type = args.output.activation.activation_type;
//
active_args.leaky_relu_negative_slope =
//
args.output.activation.leaky_relu_negative_slope;
//
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
//
active_args.leaky_relu_negative_slope;
//
DLOG << " activation_type:" << active_args.activation_type
//
<< " leaky_relu_negative_slope:"
//
<< active_args.leaky_relu_negative_slope;
//
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
pthread_mutex_lock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
if
(
ERROR
==
g_fpgainfo
.
pe_data
->
pes
[
PE_IDX_EW
]
->
status
)
{
...
...
@@ -523,8 +539,8 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
return
ret
;
}
reg_writeq
(
reg_ActivationArgs
,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
// active functoion
//
reg_writeq(reg_ActivationArgs,
//
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq
(
output_scale
,
REG_SCALE_PARAMETER
);
reg_writeq
(
args
.
driver
.
image0_address_phy
,
REG_EW_IMAGE0_BASE_ADDR
);
...
...
@@ -543,11 +559,11 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
PADDLE_MOBILE_ENFORCE
(
0
,
"EW Wait Irq Timeout!"
);
}
output_scale
=
reg_readq
(
REG_SCALE_PARAMETER
);
output_scale
=
(
output_scale
<<
32
)
|
(
output_scale
>>
32
);
fpga_copy
(
args
.
output
.
scale_address
,
&
output_scale
,
sizeof
(
float
)
*
2
);
active_args
.
activation_type
=
NONE
;
reg_writeq
(
reg_ActivationArgs
,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
//
output_scale = reg_readq(REG_SCALE_PARAMETER);
//
output_scale = (output_scale << 32) | (output_scale >> 32);
//
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
//
active_args.activation_type = NONE;
//
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
return
ret
;
...
...
src/fpga/common/fpga_common.cpp
浏览文件 @
a1cc931d
...
...
@@ -200,10 +200,10 @@ uint64_t vaddr_to_paddr(void *address) {
}
uint32_t
paddle_mobile_version
()
{
uint32_t
v_master
=
35
;
uint32_t
v_slave
=
35
;
uint32_t
v_master
=
52
;
uint32_t
v_slave
=
52
;
uint32_t
first
=
1
,
second
=
2
,
fourth_master
=
1
,
fourth_slave
=
2
;
uint32_t
first
=
1
,
second
=
2
,
fourth_master
=
1
,
fourth_slave
=
1
;
uint32_t
master
=
first
<<
24
|
second
<<
16
|
v_master
<<
8
|
fourth_master
;
uint32_t
slave
=
first
<<
24
|
second
<<
16
|
v_slave
<<
8
|
fourth_slave
;
...
...
src/fpga/common/fpga_common.h
浏览文件 @
a1cc931d
...
...
@@ -32,8 +32,12 @@ limitations under the License. */
#define FILTER_NUM_ALIGNMENT (32) // Filter number aligned to 32
#define FILTER_ELEMENT_ALIGNMENT (16) // Filter element number aligned to 16
#define BS_NUM_ALIGNMENT (8)
#define BIAS_SCALE_DMA_NUM (4)
#define RESULT_ALIGNMENT (32)
#define PE_COLUMN (8)
#define ROW_PARALLEL_NUM (2)
#define BIAS_NUM_ALIGNMENT (16)
#define ROW_PARALLEL_NUM (3)
#endif
namespace
paddle_mobile
{
...
...
@@ -89,37 +93,59 @@ struct ImageOutputArgs {
};
struct
ConvDriverParam
{
uint64_t
image_address_phy
;
uint64_t
filter_address_phy
;
uint64_t
sb_address_phy
;
uint64_t
output_address_phy
;
uint64_t
output_height
;
uint64_t
output_width
;
uint64_t
filter_per_group
;
uint64_t
channel_per_group
;
uint64_t
image_amount_per_row
;
uint64_t
image_one_pad_per_row
;
uint64_t
filter_amount_all
;
uint64_t
output_amount_per_row
;
uint64_t
deconv_param
;
uint64_t
col_padding_up
;
uint64_t
col_padding_down
;
uint64_t
row_padding_up
;
uint64_t
row_padding_down
;
uint64_t
image_block_amount_per_row
;
uint64_t
filter_pad_width_mul_channel
;
uint64_t
image_amount_per_row_multi_win_first
;
uint64_t
image_amount_per_row_multi_win
;
uint64_t
image_block_num
;
uint64_t
image_block_len
;
uint64_t
image_block_len_last
;
uint64_t
image_win_cnt
;
uint64_t
image_win_cnt_last
;
uint64_t
res_row_data_align4_pad
;
uint64_t
filter_row
;
uint64_t
filter_width
;
uint64_t
filter_height
;
uint64_t
skip_window
;
uint64_t
stride_h
;
uint64_t
filter_amount_all
;
uint64_t
prog_full_cnt
;
uint64_t
filter_align
;
uint64_t
filter_num
;
uint64_t
output_width
;
uint64_t
output_amount_per_row
;
uint64_t
res_row_data_align4_pad
;
uint64_t
cal_res_num
;
uint64_t
last_cal_res_row_num
;
uint64_t
post_prog_full_cnt
;
uint64_t
deconv_skip_row
;
// paralvl*deconv_group
uint64_t
deconv_res_skip_row
;
// deconv_group * result_amount_per_row
uint64_t
deconv_ena
;
uint64_t
deconv_dump
;
uint64_t
output_address_phy
;
uint64_t
output_height
;
uint64_t
result_amount_per_row_multi_para
;
uint64_t
sb_address_phy
;
uint64_t
fpga_bias_scale_len
;
uint64_t
cmd
;
uint64_t
filter_amount_whole
;
uint64_t
filter_address_phy
;
uint64_t
filters_amount_whole
;
uint64_t
image_address_phy
;
uint64_t
image_hight
;
uint64_t
image_amount_per_row
;
uint64_t
image_amount_per_row_multi_win_first
;
uint64_t
image_amount_per_row_multi_win
;
uint64_t
filter_pad_hight
;
uint64_t
image_block_num
;
uint64_t
image_block_len
;
uint64_t
image_block_len_last
;
uint64_t
deconv_param
;
uint64_t
cmd
;
};
struct
EWAddDriverParam
{
...
...
@@ -141,6 +167,7 @@ struct DeconvTxParm {
};
struct
ConvArgs
{
bool
relu_enabled
;
void
*
sb_address
;
// scale and bias
void
*
filter_address
;
float
*
filter_scale_address
;
...
...
@@ -209,6 +236,7 @@ struct PoolingArgs {
};
struct
EWAddArgs
{
bool
relu_enabled
;
uint32_t
const0
;
// output0 = const0 x input0 + const1 x input1;
uint32_t
const1
;
struct
ImageInputArgs
image0
;
...
...
@@ -238,6 +266,7 @@ struct DeconvArgs {
};
struct
DWconvArgs
{
uint32_t
sub_conv_num
;
bool
relu_enabled
;
void
*
bias_address
;
void
*
filter_address
;
struct
KernelArgs
kernel
;
...
...
src/framework/executor.cpp
浏览文件 @
a1cc931d
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#include "framework/executor.h"
#include <algorithm>
#include <unordered_map>
#include <utility>
#include <vector>
#include "common/enforce.h"
...
...
@@ -638,7 +639,8 @@ std::map<std::string, float> LoadQuantValFromFile(std::string filename) {
std
::
ifstream
in
;
in
.
open
(
filename
,
std
::
ios
::
in
);
if
(
!
in
.
is_open
())
{
std
::
cout
<<
"open File Failed."
<<
std
::
endl
;
// std::cout << "open File Failed." << std::endl;
DLOG
<<
"open File Failed."
;
exit
(
-
1
);
}
...
...
src/operators/kernel/fpga/V2/conv_add_bn_kernel.cpp
浏览文件 @
a1cc931d
...
...
@@ -22,6 +22,7 @@ namespace operators {
template
<
>
bool
ConvAddBNKernel
<
FPGA
,
float
>::
Init
(
FusionConvAddBNParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
false
;
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
NONE
;
int16_t
leaky_relu_negative_slope
=
0
;
...
...
@@ -34,7 +35,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
...
...
@@ -64,10 +65,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
delete
new_scale
;
...
...
src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp
浏览文件 @
a1cc931d
...
...
@@ -23,9 +23,9 @@ namespace operators {
template
<
>
bool
ConvAddBNReluKernel
<
FPGA
,
float
>::
Init
(
FusionConvAddBNReluParam
<
FPGA
>
*
param
)
{
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
LEAKYRELU
;
int16_t
leaky_relu_negative_slope
=
0
;
bool
relu_enabled
=
true
;
// paddle_mobile::fpga::ActivationType activation_enable =
// paddle_mobile::fpga::LEAKYRELU
;
auto
input
=
const_cast
<
LoDTensor
*>
(
param
->
Input
());
auto
bias
=
param
->
Bias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
...
...
@@ -34,7 +34,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
const
int
groups
=
param
->
Groups
();
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
vector
<
int
>
paddings
=
param
->
Paddings
();
vector
<
int
>
strides
=
param
->
Strides
();
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
...
...
@@ -70,17 +70,17 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
if
(
groups
==
channel
)
{
fpga
::
format_dwconv_data
(
filter
,
out
,
new_scale_ptr
,
&
new_bias_ptr
);
fpga
::
DWconvArgs
dwconv_arg
=
{
0
};
fpga
::
fill_dwconv_arg
(
&
dwconv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
strides
[
0
],
stride
s
[
1
],
paddings
[
0
],
paddings
[
1
],
new_bias_ptr
);
fpga
::
fill_dwconv_arg
(
&
dwconv_arg
,
input
,
out
,
filter
,
relu_enabled
,
strides
[
0
],
strides
[
1
],
paddings
[
0
],
padding
s
[
1
],
new_bias_ptr
);
param
->
SetFpgaArgs
(
dwconv_arg
);
fpga
::
fpga_free
(
bs_ptr
);
}
else
{
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Groups
(),
stride
s
[
0
],
strides
[
1
],
paddings
[
0
],
paddings
[
1
],
bs_ptr
);
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
param
->
Groups
(),
strides
[
0
],
strides
[
1
],
padding
s
[
0
],
paddings
[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
}
delete
new_scale
;
...
...
src/operators/kernel/fpga/V2/conv_add_kernel.cpp
浏览文件 @
a1cc931d
...
...
@@ -31,7 +31,7 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
"Output channel should be equal to bias number"
);
...
...
@@ -45,8 +45,7 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Groups
(),
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
false
,
param
->
Groups
(),
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
...
...
src/operators/kernel/fpga/V2/conv_add_relu_kernel.cpp
浏览文件 @
a1cc931d
...
...
@@ -31,7 +31,7 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
"Output channel should be equal to bias number"
);
...
...
@@ -45,8 +45,7 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Groups
(),
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
true
,
param
->
Groups
(),
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
...
...
src/operators/kernel/fpga/V2/conv_bn_kernel.cpp
浏览文件 @
a1cc931d
...
...
@@ -30,7 +30,7 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
auto
bn_scale_ptr
=
param
->
InputScale
()
->
data
<
float
>
();
...
...
@@ -56,8 +56,7 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Groups
(),
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
false
,
param
->
Groups
(),
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
...
...
src/operators/kernel/fpga/V2/conv_bn_relu_kernel.cpp
浏览文件 @
a1cc931d
...
...
@@ -29,7 +29,7 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
auto
bn_scale_ptr
=
param
->
InputScale
()
->
data
<
float
>
();
...
...
@@ -58,17 +58,16 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
if
(
groups
==
channel
)
{
fpga
::
format_dwconv_data
(
filter
,
out
,
new_scale_ptr
,
&
new_bias_ptr
);
fpga
::
DWconvArgs
dwconv_arg
=
{
0
};
fpga
::
fill_dwconv_arg
(
&
dwconv_arg
,
input
,
out
,
filter
,
activation_enabl
e
,
leaky_relu_negative_slope
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
new_bias_ptr
);
fpga
::
fill_dwconv_arg
(
&
dwconv_arg
,
input
,
out
,
filter
,
tru
e
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
new_bias_ptr
);
param
->
SetFpgaArgs
(
dwconv_arg
);
fpga
::
fpga_free
(
bs_ptr
);
}
else
{
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Groups
(),
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
true
,
param
->
Groups
(),
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
...
...
src/operators/kernel/fpga/V2/conv_kernel.cpp
浏览文件 @
a1cc931d
...
...
@@ -29,7 +29,7 @@ bool ConvKernel<FPGA, float>::Init(ConvParam<FPGA> *param) {
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
int
channel
=
out
->
dims
()[
1
];
auto
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
// NOLINT
...
...
@@ -40,8 +40,7 @@ bool ConvKernel<FPGA, float>::Init(ConvParam<FPGA> *param) {
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Groups
(),
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
false
,
param
->
Groups
(),
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
...
...
src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp
浏览文件 @
a1cc931d
...
...
@@ -31,7 +31,7 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
int
channel
=
out
->
dims
()[
1
];
...
...
@@ -58,8 +58,7 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
false
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
...
...
@@ -70,10 +69,10 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
}
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
activation_enabl
e
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
fals
e
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
deconv_arg
);
}
return
true
;
...
...
src/operators/kernel/fpga/V2/deconv_add_bn_kernel.cpp
浏览文件 @
a1cc931d
...
...
@@ -33,7 +33,7 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
"Output channel should be equal to bias number"
);
int
channel
=
out
->
dims
()[
1
];
...
...
@@ -61,8 +61,7 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
false
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
...
...
@@ -73,10 +72,10 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
}
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
activation_enabl
e
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
fals
e
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
deconv_arg
);
}
return
true
;
...
...
src/operators/kernel/fpga/V2/deconv_add_bn_relu_kernel.cpp
浏览文件 @
a1cc931d
...
...
@@ -34,7 +34,7 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
"Output channel should be equal to bias number"
);
int
channel
=
out
->
dims
()[
1
];
...
...
@@ -62,8 +62,7 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
true
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
...
...
@@ -74,10 +73,10 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
}
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
activation_enabl
e
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
tru
e
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
deconv_arg
);
}
return
true
;
...
...
src/operators/kernel/fpga/V2/deconv_add_kernel.cpp
浏览文件 @
a1cc931d
...
...
@@ -33,7 +33,7 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
"Output channel should be equal to bias number"
);
int
channel
=
out
->
dims
()[
1
];
...
...
@@ -61,8 +61,7 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
false
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
...
...
@@ -73,10 +72,10 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
}
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
activation_enabl
e
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
fals
e
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
deconv_arg
);
}
...
...
src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp
浏览文件 @
a1cc931d
...
...
@@ -34,7 +34,7 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
"Output channel should be equal to bias number"
);
int
channel
=
out
->
dims
()[
1
];
...
...
@@ -57,8 +57,7 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
true
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
...
...
@@ -69,10 +68,10 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
}
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
activation_enabl
e
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
tru
e
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
deconv_arg
);
}
return
true
;
...
...
src/operators/kernel/fpga/V2/deconv_bn_relu_kernel.cpp
浏览文件 @
a1cc931d
...
...
@@ -35,7 +35,7 @@ bool DeconvBNReluKernel<FPGA, float>::Init(
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
auto
bn_scale_ptr
=
param
->
InputScale
()
->
data
<
float
>
();
...
...
@@ -80,18 +80,17 @@ bool DeconvBNReluKernel<FPGA, float>::Init(
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
true
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
}
else
{
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
activation_enabl
e
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
tru
e
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
deconv_arg
);
}
delete
new_scale
;
...
...
src/operators/kernel/fpga/V2/feed_kernel.cpp
浏览文件 @
a1cc931d
...
...
@@ -44,7 +44,6 @@ void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> ¶m) {
}
fpga
::
format_image
(
input
);
output
->
ShareDataWith
(
*
input
);
input
->
external_data
=
nullptr
;
}
template
class
FeedKernel
<
FPGA
,
float
>;
...
...
src/operators/kernel/fpga/V2/fusion_fc_kernel.cpp
浏览文件 @
a1cc931d
...
...
@@ -20,6 +20,7 @@ namespace operators {
template
<
>
bool
FusionFcKernel
<
FPGA
,
float
>::
Init
(
FusionFcParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
false
;
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
NONE
;
int16_t
leaky_relu_negative_slope
=
0
;
...
...
@@ -58,8 +59,8 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
fpga
::
format_ofm
(
out
);
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input_x
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
1
,
1
,
1
,
0
,
0
,
bs_ptr
);
fpga
::
fill_split_arg
(
&
conv_arg
,
input_x
,
out
,
filter
,
relu_enabled
,
1
,
1
,
1
,
0
,
0
,
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
return
true
;
}
...
...
src/operators/kernel/fpga/V2/fusion_fc_relu_kernel.cpp
浏览文件 @
a1cc931d
...
...
@@ -20,6 +20,7 @@ namespace operators {
template
<
>
bool
FusionFcReluKernel
<
FPGA
,
float
>::
Init
(
FusionFcReluParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
false
;
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
LEAKYRELU
;
int16_t
leaky_relu_negative_slope
=
0
;
...
...
@@ -58,8 +59,8 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
fpga
::
format_ofm
(
out
);
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input_x
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
1
,
1
,
1
,
0
,
0
,
bs_ptr
);
fpga
::
fill_split_arg
(
&
conv_arg
,
input_x
,
out
,
filter
,
relu_enabled
,
1
,
1
,
1
,
0
,
0
,
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
return
true
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录