Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
a1cc931d
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a1cc931d
编写于
5月 08, 2019
作者:
J
jameswu2014
提交者:
qnqinan
5月 08, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
V2-conv-hellocase pass & V1 verify-pass (#1608)
上级
64aa8f05
变更
22
显示空白变更内容
内联
并排
Showing
22 changed file
with
355 addition
and
287 deletion
+355
-287
src/fpga/V2/api.cpp
src/fpga/V2/api.cpp
+94
-56
src/fpga/V2/api.h
src/fpga/V2/api.h
+8
-16
src/fpga/V2/pe.cpp
src/fpga/V2/pe.cpp
+127
-111
src/fpga/common/fpga_common.cpp
src/fpga/common/fpga_common.cpp
+3
-3
src/fpga/common/fpga_common.h
src/fpga/common/fpga_common.h
+48
-19
src/framework/executor.cpp
src/framework/executor.cpp
+3
-1
src/operators/kernel/fpga/V2/conv_add_bn_kernel.cpp
src/operators/kernel/fpga/V2/conv_add_bn_kernel.cpp
+6
-5
src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp
src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp
+10
-10
src/operators/kernel/fpga/V2/conv_add_kernel.cpp
src/operators/kernel/fpga/V2/conv_add_kernel.cpp
+2
-3
src/operators/kernel/fpga/V2/conv_add_relu_kernel.cpp
src/operators/kernel/fpga/V2/conv_add_relu_kernel.cpp
+2
-3
src/operators/kernel/fpga/V2/conv_bn_kernel.cpp
src/operators/kernel/fpga/V2/conv_bn_kernel.cpp
+2
-3
src/operators/kernel/fpga/V2/conv_bn_relu_kernel.cpp
src/operators/kernel/fpga/V2/conv_bn_relu_kernel.cpp
+6
-7
src/operators/kernel/fpga/V2/conv_kernel.cpp
src/operators/kernel/fpga/V2/conv_kernel.cpp
+2
-3
src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp
src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp
+6
-7
src/operators/kernel/fpga/V2/deconv_add_bn_kernel.cpp
src/operators/kernel/fpga/V2/deconv_add_bn_kernel.cpp
+6
-7
src/operators/kernel/fpga/V2/deconv_add_bn_relu_kernel.cpp
src/operators/kernel/fpga/V2/deconv_add_bn_relu_kernel.cpp
+6
-7
src/operators/kernel/fpga/V2/deconv_add_kernel.cpp
src/operators/kernel/fpga/V2/deconv_add_kernel.cpp
+6
-7
src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp
src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp
+6
-7
src/operators/kernel/fpga/V2/deconv_bn_relu_kernel.cpp
src/operators/kernel/fpga/V2/deconv_bn_relu_kernel.cpp
+6
-7
src/operators/kernel/fpga/V2/feed_kernel.cpp
src/operators/kernel/fpga/V2/feed_kernel.cpp
+0
-1
src/operators/kernel/fpga/V2/fusion_fc_kernel.cpp
src/operators/kernel/fpga/V2/fusion_fc_kernel.cpp
+3
-2
src/operators/kernel/fpga/V2/fusion_fc_relu_kernel.cpp
src/operators/kernel/fpga/V2/fusion_fc_relu_kernel.cpp
+3
-2
未找到文件。
src/fpga/V2/api.cpp
浏览文件 @
a1cc931d
...
@@ -22,6 +22,7 @@ limitations under the License. */
...
@@ -22,6 +22,7 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
fpga
{
namespace
fpga
{
#define USE_RELU 1
#define USE_BIAS 2
#define USE_BIAS 2
void
format_image
(
framework
::
Tensor
*
image_tensor
)
{
void
format_image
(
framework
::
Tensor
*
image_tensor
)
{
...
@@ -301,7 +302,9 @@ void expand_conv_arg(ConvArgs *arg) {
...
@@ -301,7 +302,9 @@ void expand_conv_arg(ConvArgs *arg) {
ConvArgs
args
=
*
arg
;
ConvArgs
args
=
*
arg
;
auto
fpga_bias_scale_len
=
auto
fpga_bias_scale_len
=
align_to_x
(
args
.
filter_num
/
args
.
group_num
,
8
)
*
args
.
group_num
;
align_to_x
(
args
.
filter_num
/
args
.
group_num
,
BS_NUM_ALIGNMENT
)
*
args
.
group_num
;
fpga_bias_scale_len
=
fpga_bias_scale_len
/
BIAS_SCALE_DMA_NUM
;
auto
output_height
=
auto
output_height
=
(
args
.
image
.
height
+
args
.
image
.
pad_height
*
2
-
args
.
kernel
.
height
)
/
(
args
.
image
.
height
+
args
.
image
.
pad_height
*
2
-
args
.
kernel
.
height
)
/
...
@@ -325,7 +328,7 @@ void expand_conv_arg(ConvArgs *arg) {
...
@@ -325,7 +328,7 @@ void expand_conv_arg(ConvArgs *arg) {
auto
output_amount_per_row
=
align_to_x
(
auto
output_amount_per_row
=
align_to_x
(
(
output_width
-
(
args
.
deconv_tx_param
.
omit_size
)
*
2
)
*
args
.
filter_num
,
(
output_width
-
(
args
.
deconv_tx_param
.
omit_size
)
*
2
)
*
args
.
filter_num
,
IMAGE
_ALIGNMENT
);
RESULT
_ALIGNMENT
);
// find the opt partition strategy
// find the opt partition strategy
uint64_t
res_win
;
uint64_t
res_win
;
...
@@ -335,10 +338,10 @@ void expand_conv_arg(ConvArgs *arg) {
...
@@ -335,10 +338,10 @@ void expand_conv_arg(ConvArgs *arg) {
(
args
.
image
.
channels
*
(
args
.
image
.
channels
*
(
args
.
kernel
.
width
+
(
res_win
-
1
)
*
args
.
kernel
.
stride_w
)),
(
args
.
kernel
.
width
+
(
res_win
-
1
)
*
args
.
kernel
.
stride_w
)),
IMAGE_ALIGNMENT
)
/
IMAGE_ALIGNMENT
)
/
16
+
IMAGE_ALIGNMENT
+
1
)
*
1
)
*
args
.
kernel
.
height
>
args
.
kernel
.
height
>
2
048
)
{
2
56
)
{
break
;
break
;
}
}
}
}
...
@@ -350,6 +353,7 @@ void expand_conv_arg(ConvArgs *arg) {
...
@@ -350,6 +353,7 @@ void expand_conv_arg(ConvArgs *arg) {
if
(((
res_win
%
2
)
!=
0
)
&&
(
res_win
!=
1
))
{
if
(((
res_win
%
2
)
!=
0
)
&&
(
res_win
!=
1
))
{
res_win
=
res_win
-
1
;
res_win
=
res_win
-
1
;
}
}
PADDLE_MOBILE_ENFORCE
(
res_win
>=
2
,
"window too bigger than fpga volume"
);
res_fit
=
res_win
;
res_fit
=
res_win
;
auto
block_num
=
(
output_width
+
res_fit
-
1
)
/
res_fit
;
auto
block_num
=
(
output_width
+
res_fit
-
1
)
/
res_fit
;
...
@@ -375,14 +379,14 @@ void expand_conv_arg(ConvArgs *arg) {
...
@@ -375,14 +379,14 @@ void expand_conv_arg(ConvArgs *arg) {
align_to_x
((
args
.
image
.
channels
*
align_to_x
((
args
.
image
.
channels
*
(
args
.
kernel
.
width
+
(
block_len
-
1
)
*
args
.
kernel
.
stride_w
)),
(
args
.
kernel
.
width
+
(
block_len
-
1
)
*
args
.
kernel
.
stride_w
)),
IMAGE_ALIGNMENT
)
/
IMAGE_ALIGNMENT
)
/
16
+
IMAGE_ALIGNMENT
+
1
;
1
;
auto
image_block_len_last
=
auto
image_block_len_last
=
align_to_x
(
align_to_x
(
(
args
.
image
.
channels
*
(
args
.
image
.
channels
*
(
args
.
kernel
.
width
+
(
block_last
-
1
)
*
args
.
kernel
.
stride_w
)),
(
args
.
kernel
.
width
+
(
block_last
-
1
)
*
args
.
kernel
.
stride_w
)),
IMAGE_ALIGNMENT
)
/
IMAGE_ALIGNMENT
)
/
16
+
IMAGE_ALIGNMENT
+
1
;
1
;
auto
image_win_cnt
=
block_len
;
auto
image_win_cnt
=
block_len
;
auto
image_win_cnt_last
=
block_last
;
auto
image_win_cnt_last
=
block_last
;
...
@@ -395,46 +399,85 @@ void expand_conv_arg(ConvArgs *arg) {
...
@@ -395,46 +399,85 @@ void expand_conv_arg(ConvArgs *arg) {
(
512
/
(
align_to_x
(
args
.
filter_num
,
4
)
/
4
*
2
)
>
2
)
(
512
/
(
align_to_x
(
args
.
filter_num
,
4
)
/
4
*
2
)
>
2
)
?
(
512
/
(
align_to_x
(
args
.
filter_num
,
4
)
/
4
*
2
)
-
2
)
?
(
512
/
(
align_to_x
(
args
.
filter_num
,
4
)
/
4
*
2
)
-
2
)
:
0
;
:
0
;
//
auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS;
auto
cmd
=
0UL
|
(
args
.
relu_enabled
?
USE_RELU
:
0
)
|
USE_BIAS
;
auto
cmd
=
0UL
|
USE_BIAS
;
//
auto cmd = 0UL | USE_BIAS;
auto
deconv_param
=
((
args
.
deconv_tx_param
.
deconv_en
)
<<
16
)
|
auto
deconv_param
=
((
args
.
deconv_tx_param
.
deconv_en
)
<<
16
)
|
((
args
.
deconv_tx_param
.
sub_conv_num
)
<<
8
)
|
((
args
.
deconv_tx_param
.
sub_conv_num
)
<<
8
)
|
((
args
.
deconv_tx_param
.
omit_size
)
<<
0
);
((
args
.
deconv_tx_param
.
omit_size
)
<<
0
);
(
*
arg
).
driver
.
image_address_phy
=
vaddr_to_paddr
(
args
.
image
.
address
);
(
*
arg
).
driver
.
sb_address_phy
=
vaddr_to_paddr
(
args
.
sb_address
);
(
*
arg
).
driver
.
filter_address_phy
=
vaddr_to_paddr
(
args
.
filter_address
);
(
*
arg
).
driver
.
output_address_phy
=
vaddr_to_paddr
(
args
.
output
.
address
)
+
args
.
deconv_tx_param
.
out_addr_offset
;
(
*
arg
).
driver
.
output_height
=
output_height
;
(
*
arg
).
driver
.
output_width
=
output_width
;
(
*
arg
).
driver
.
filter_per_group
=
filter_per_group
;
(
*
arg
).
driver
.
filter_per_group
=
filter_per_group
;
(
*
arg
).
driver
.
channel_per_group
=
channel_per_group
;
(
*
arg
).
driver
.
channel_per_group
=
channel_per_group
;
(
*
arg
).
driver
.
image_amount_per_row
=
image_amount_per_row
;
(
*
arg
).
driver
.
image_one_pad_per_row
=
image_one_pad_per_row
;
(
*
arg
).
driver
.
image_one_pad_per_row
=
image_one_pad_per_row
;
(
*
arg
).
driver
.
filter_amount_all
=
filter_amount_all
;
(
*
arg
).
driver
.
deconv_param
=
deconv_param
;
(
*
arg
).
driver
.
output_amount_per_row
=
output_amount_per_row
;
// new
(
*
arg
).
driver
.
col_padding_up
=
args
.
image
.
pad_width
*
args
.
image
.
channels
;
(
*
arg
).
driver
.
col_padding_down
=
image_one_pad_per_row
;
(
*
arg
).
driver
.
row_padding_up
=
args
.
image
.
pad_height
;
(
*
arg
).
driver
.
row_padding_down
=
args
.
image
.
pad_height
+
args
.
image
.
height
;
(
*
arg
).
driver
.
image_block_amount_per_row
=
image_block_amount_per_row
;
(
*
arg
).
driver
.
image_block_amount_per_row
=
image_block_amount_per_row
;
(
*
arg
).
driver
.
filter_pad_width_mul_channel
=
filter_pad_width_mul_channel
;
(
*
arg
).
driver
.
filter_pad_width_mul_channel
=
filter_pad_width_mul_channel
;
(
*
arg
).
driver
.
image_win_cnt
=
image_win_cnt
;
(
*
arg
).
driver
.
image_win_cnt_last
=
image_win_cnt_last
;
(
*
arg
).
driver
.
filter_row
=
args
.
kernel
.
width
*
args
.
image
.
channels
;
(
*
arg
).
driver
.
filter_width
=
args
.
kernel
.
width
;
(
*
arg
).
driver
.
filter_height
=
args
.
kernel
.
height
;
(
*
arg
).
driver
.
skip_window
=
args
.
image
.
channels
*
args
.
kernel
.
stride_w
;
(
*
arg
).
driver
.
stride_h
=
args
.
kernel
.
stride_h
;
(
*
arg
).
driver
.
filter_amount_all
=
filter_amount_all
;
(
*
arg
).
driver
.
prog_full_cnt
=
prog_full_cnt
;
(
*
arg
).
driver
.
filter_align
=
args
.
filter_num
/
(
4
*
PE_COLUMN
)
+
(((
args
.
filter_num
%
(
4
*
PE_COLUMN
)))
?
1
:
0
);
(
*
arg
).
driver
.
filter_num
=
args
.
filter_num
;
(
*
arg
).
driver
.
output_width
=
output_width
;
(
*
arg
).
driver
.
output_amount_per_row
=
output_amount_per_row
;
(
*
arg
).
driver
.
res_row_data_align4_pad
=
res_row_data_align4_pad
;
(
*
arg
).
driver
.
cal_res_num
=
output_height
/
ROW_PARALLEL_NUM
+
((
output_height
%
ROW_PARALLEL_NUM
)
?
1
:
0
)
-
1
;
(
*
arg
).
driver
.
last_cal_res_row_num
=
(
output_height
%
(
ROW_PARALLEL_NUM
))
?
(
output_height
%
(
ROW_PARALLEL_NUM
))
:
(
ROW_PARALLEL_NUM
);
(
*
arg
).
driver
.
post_prog_full_cnt
=
post_prog_full_cnt
;
(
*
arg
).
driver
.
deconv_skip_row
=
ROW_PARALLEL_NUM
*
args
.
deconv_tx_param
.
sub_conv_num
;
// paralvl*deconv_group
(
*
arg
).
driver
.
deconv_res_skip_row
=
args
.
deconv_tx_param
.
sub_conv_num
*
output_amount_per_row
;
// deconv_group * result_amount_per_row
(
*
arg
).
driver
.
deconv_ena
=
args
.
deconv_tx_param
.
deconv_en
;
(
*
arg
).
driver
.
deconv_dump
=
args
.
deconv_tx_param
.
omit_size
;
(
*
arg
).
driver
.
output_address_phy
=
vaddr_to_paddr
(
args
.
output
.
address
)
+
args
.
deconv_tx_param
.
out_addr_offset
;
(
*
arg
).
driver
.
output_height
=
output_height
;
(
*
arg
).
driver
.
result_amount_per_row_multi_para
=
output_amount_per_row
/
RESULT_ALIGNMENT
*
(
args
.
deconv_tx_param
.
deconv_en
?
(
*
arg
).
driver
.
deconv_skip_row
:
ROW_PARALLEL_NUM
);
(
*
arg
).
driver
.
sb_address_phy
=
vaddr_to_paddr
(
args
.
sb_address
);
(
*
arg
).
driver
.
fpga_bias_scale_len
=
fpga_bias_scale_len
;
(
*
arg
).
driver
.
filter_amount_whole
=
filter_amount_all
;
(
*
arg
).
driver
.
filter_address_phy
=
vaddr_to_paddr
(
args
.
filter_address
);
(
*
arg
).
driver
.
filters_amount_whole
=
filter_amount_all
*
(
*
arg
).
driver
.
filter_align
*
(
4
*
PE_COLUMN
);
(
*
arg
).
driver
.
image_address_phy
=
vaddr_to_paddr
(
args
.
image
.
address
);
(
*
arg
).
driver
.
image_hight
=
args
.
image
.
height
;
(
*
arg
).
driver
.
image_amount_per_row
=
image_amount_per_row
;
(
*
arg
).
driver
.
image_amount_per_row_multi_win_first
=
(
*
arg
).
driver
.
image_amount_per_row_multi_win_first
=
image_amount_per_row_multi_win_first
;
image_amount_per_row_multi_win_first
;
(
*
arg
).
driver
.
image_amount_per_row_multi_win
=
image_amount_per_row_multi_win
;
(
*
arg
).
driver
.
image_amount_per_row_multi_win
=
image_amount_per_row_multi_win
;
(
*
arg
).
driver
.
filter_pad_hight
=
args
.
image
.
pad_height
;
(
*
arg
).
driver
.
image_block_num
=
image_block_num
;
(
*
arg
).
driver
.
image_block_num
=
image_block_num
;
(
*
arg
).
driver
.
image_block_len
=
image_block_len
;
(
*
arg
).
driver
.
image_block_len
=
image_block_len
;
(
*
arg
).
driver
.
image_block_len_last
=
image_block_len_last
;
(
*
arg
).
driver
.
image_block_len_last
=
image_block_len_last
;
(
*
arg
).
driver
.
image_win_cnt
=
image_win_cnt
;
(
*
arg
).
driver
.
image_win_cnt_last
=
image_win_cnt_last
;
(
*
arg
).
driver
.
res_row_data_align4_pad
=
res_row_data_align4_pad
;
(
*
arg
).
driver
.
prog_full_cnt
=
prog_full_cnt
;
(
*
arg
).
driver
.
post_prog_full_cnt
=
post_prog_full_cnt
;
(
*
arg
).
driver
.
fpga_bias_scale_len
=
fpga_bias_scale_len
;
(
*
arg
).
driver
.
cmd
=
cmd
;
(
*
arg
).
driver
.
cmd
=
cmd
;
(
*
arg
).
driver
.
deconv_param
=
deconv_param
;
}
// expand_conv_arg()
}
// expand_conv_arg()
void
expand_EW_arg
(
EWAddArgs
*
arg
)
{
void
expand_EW_arg
(
EWAddArgs
*
arg
)
{
EWAddArgs
args
=
*
arg
;
EWAddArgs
args
=
*
arg
;
uint64_t
cmd
=
0
;
uint64_t
cmd
=
args
.
relu_enabled
?
USE_RELU
:
0
;
uint64_t
datalen
=
(
uint64_t
)
args
.
image0
.
width
*
uint64_t
datalen
=
(
uint64_t
)
args
.
image0
.
width
*
(
uint64_t
)
args
.
image0
.
height
*
(
uint64_t
)
args
.
image0
.
height
*
(
uint64_t
)
args
.
image0
.
channels
;
(
uint64_t
)
args
.
image0
.
channels
;
...
@@ -462,10 +505,8 @@ void expand_EW_arg(EWAddArgs *arg) {
...
@@ -462,10 +505,8 @@ void expand_EW_arg(EWAddArgs *arg) {
void
fill_split_arg
(
struct
SplitConvArgs
*
arg
,
framework
::
Tensor
*
input
,
void
fill_split_arg
(
struct
SplitConvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
ActivationType
activation_enable
,
bool
relu_enabled
,
int
group_num
,
int
stride_h
,
int16_t
leaky_relu_negative_slope
,
int
group_num
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
)
{
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
)
{
auto
input_ptr
=
input
->
data
<
int8_t
>
();
auto
input_ptr
=
input
->
data
<
int8_t
>
();
auto
filter_ptr
=
filter
->
data
<
int8_t
>
();
auto
filter_ptr
=
filter
->
data
<
int8_t
>
();
auto
out_ptr
=
out
->
data
<
int8_t
>
();
auto
out_ptr
=
out
->
data
<
int8_t
>
();
...
@@ -473,6 +514,7 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
...
@@ -473,6 +514,7 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
arg
->
group_num
=
(
uint32_t
)
group_num
;
arg
->
group_num
=
(
uint32_t
)
group_num
;
// Either group_num or split_num = 1;
// Either group_num or split_num = 1;
PADDLE_MOBILE_ENFORCE
(
group_num
==
1
,
"group_num is not equal to 1"
);
arg
->
split_num
=
group_num
==
1
?
(
uint32_t
)
get_plit_num
(
filter
)
:
1
;
arg
->
split_num
=
group_num
==
1
?
(
uint32_t
)
get_plit_num
(
filter
)
:
1
;
arg
->
filter_num
=
(
uint32_t
)
filter
->
dims
()[
0
];
arg
->
filter_num
=
(
uint32_t
)
filter
->
dims
()[
0
];
arg
->
output
.
address
=
out_ptr
;
arg
->
output
.
address
=
out_ptr
;
...
@@ -511,9 +553,7 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
...
@@ -511,9 +553,7 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
filter
->
dims
()[
3
]));
filter
->
dims
()[
3
]));
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
arg
->
conv_arg
[
i
].
output
.
activation
.
activation_type
=
activation_enable
;
arg
->
conv_arg
[
i
].
relu_enabled
=
relu_enabled
;
arg
->
conv_arg
[
i
].
output
.
activation
.
leaky_relu_negative_slope
=
leaky_relu_negative_slope
;
arg
->
conv_arg
[
i
].
group_num
=
(
uint32_t
)
group_num
;
arg
->
conv_arg
[
i
].
group_num
=
(
uint32_t
)
group_num
;
arg
->
conv_arg
[
i
].
kernel
.
stride_h
=
(
uint32_t
)
stride_h
;
arg
->
conv_arg
[
i
].
kernel
.
stride_h
=
(
uint32_t
)
stride_h
;
arg
->
conv_arg
[
i
].
kernel
.
stride_w
=
(
uint32_t
)
stride_w
;
arg
->
conv_arg
[
i
].
kernel
.
stride_w
=
(
uint32_t
)
stride_w
;
...
@@ -585,9 +625,8 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
...
@@ -585,9 +625,8 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
void
fill_deconv_arg
(
struct
DeconvArgs
*
arg
,
framework
::
Tensor
*
input
,
void
fill_deconv_arg
(
struct
DeconvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
ActivationType
activation_enable
,
bool
relu_enabled
,
int
group_num
,
int
stride_h
,
int16_t
leaky_relu_negative_slope
,
int
group_num
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
)
{
float
*
bs_ptr
)
{
auto
input_ptr
=
input
->
data
<
int8_t
>
();
auto
input_ptr
=
input
->
data
<
int8_t
>
();
auto
filter_ptr
=
filter
->
data
<
int8_t
>
();
auto
filter_ptr
=
filter
->
data
<
int8_t
>
();
...
@@ -713,12 +752,14 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
...
@@ -713,12 +752,14 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
}
}
for
(
int
j
=
0
;
j
<
split_num
;
++
j
)
{
for
(
int
j
=
0
;
j
<
split_num
;
++
j
)
{
arg
->
split_conv_args
[
i
]
->
conv_arg
[
j
].
output
.
activation
.
activation_type
=
// arg->split_conv_args[i]->conv_arg[j].output.activation.activation_type
activation_enable
;
// =
arg
->
split_conv_args
[
i
]
// activation_enable;
->
conv_arg
[
j
]
// arg->split_conv_args[i]
.
output
.
activation
.
leaky_relu_negative_slope
=
// ->conv_arg[j]
leaky_relu_negative_slope
;
// .output.activation.leaky_relu_negative_slope =
// leaky_relu_negative_slope;
arg
->
split_conv_args
[
i
]
->
conv_arg
[
j
].
relu_enabled
=
relu_enabled
;
arg
->
split_conv_args
[
i
]
->
conv_arg
[
j
].
group_num
=
(
uint32_t
)
group_num
;
arg
->
split_conv_args
[
i
]
->
conv_arg
[
j
].
group_num
=
(
uint32_t
)
group_num
;
arg
->
split_conv_args
[
i
]
->
conv_arg
[
j
].
kernel
.
width
=
arg
->
split_conv_args
[
i
]
->
conv_arg
[
j
].
kernel
.
width
=
...
@@ -831,16 +872,14 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
...
@@ -831,16 +872,14 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
void
fill_dwconv_arg
(
struct
DWconvArgs
*
arg
,
framework
::
Tensor
*
input
,
void
fill_dwconv_arg
(
struct
DWconvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
ActivationType
activation_enable
,
bool
relu_enabled
,
int
stride_h
,
int
stride_w
,
int16_t
leaky_relu_negative_slope
,
int
stride_h
,
int
padding_h
,
int
padding_w
,
float
*
bias_ptr
)
{
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bias_ptr
)
{
auto
filter_ptr
=
filter
->
data
<
int16_t
>
();
auto
filter_ptr
=
filter
->
data
<
int16_t
>
();
auto
input_ptr
=
input
->
data
<
int8_t
>
();
auto
input_ptr
=
input
->
data
<
int8_t
>
();
auto
output_ptr
=
out
->
mutable_data
<
int8_t
>
();
auto
output_ptr
=
out
->
mutable_data
<
int8_t
>
();
arg
->
sub_conv_num
=
1
;
arg
->
sub_conv_num
=
1
;
arg
->
output
.
activation
.
activation_type
=
activation_enable
;
arg
->
relu_enabled
=
relu_enabled
;
arg
->
output
.
activation
.
leaky_relu_negative_slope
=
leaky_relu_negative_slop
e
;
// arg->output.activation.activation_type = activation_enabl
e;
arg
->
bias_address
=
bias_ptr
;
arg
->
bias_address
=
bias_ptr
;
arg
->
filter_address
=
filter_ptr
;
arg
->
filter_address
=
filter_ptr
;
arg
->
kernel
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
arg
->
kernel
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
...
@@ -860,10 +899,8 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
...
@@ -860,10 +899,8 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
void
fill_DWDeconv_arg
(
struct
DWDeconvArgs
*
arg
,
framework
::
Tensor
*
input
,
void
fill_DWDeconv_arg
(
struct
DWDeconvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
ActivationType
activation_enable
,
bool
relu_enabled
,
int
stride_h
,
int
stride_w
,
int16_t
leaky_relu_negative_slope
,
int
stride_h
,
int
padding_h
,
int
padding_w
,
float
*
bias_ptr
)
{
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bias_ptr
)
{
auto
filter_ptr
=
filter
->
data
<
int8_t
>
();
auto
filter_ptr
=
filter
->
data
<
int8_t
>
();
auto
input_ptr
=
input
->
data
<
int8_t
>
();
auto
input_ptr
=
input
->
data
<
int8_t
>
();
...
@@ -913,10 +950,11 @@ void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input,
...
@@ -913,10 +950,11 @@ void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input,
arg
->
dw_conv_args
.
push_back
(
std
::
make_shared
<
DWconvArgs
>
());
arg
->
dw_conv_args
.
push_back
(
std
::
make_shared
<
DWconvArgs
>
());
arg
->
dw_conv_args
[
i
]
->
sub_conv_num
=
sub_conv_num
;
arg
->
dw_conv_args
[
i
]
->
sub_conv_num
=
sub_conv_num
;
// arg->dw_conv_args[i]->relu_enabled = relu_enabled;
arg
->
dw_conv_args
[
i
]
->
relu_enabled
=
relu_enabled
;
arg
->
dw_conv_args
[
i
]
->
output
.
activation
.
activation_type
=
activation_enable
;
// arg->dw_conv_args[i]->output.activation.activation_type =
arg
->
dw_conv_args
[
i
]
->
output
.
activation
.
leaky_relu_negative_slope
=
// activation_enable;
leaky_relu_negative_slope
;
// arg->dw_conv_args[i]->output.activation.leaky_relu_negative_slope =
// leaky_relu_negative_slope;
arg
->
dw_conv_args
[
i
]
->
bias_address
=
bias_ptr
;
arg
->
dw_conv_args
[
i
]
->
bias_address
=
bias_ptr
;
arg
->
dw_conv_args
[
i
]
->
filter_address
=
arg
->
dw_conv_args
[
i
]
->
filter_address
=
...
...
src/fpga/V2/api.h
浏览文件 @
a1cc931d
...
@@ -48,28 +48,20 @@ void format_concat_output(framework::Tensor* out, int height, int width,
...
@@ -48,28 +48,20 @@ void format_concat_output(framework::Tensor* out, int height, int width,
void
fill_split_arg
(
struct
SplitConvArgs
*
arg
,
framework
::
Tensor
*
input
,
void
fill_split_arg
(
struct
SplitConvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
ActivationType
activation_enable
,
bool
relu_enabled
,
int
group_num
,
int
stride_h
,
int16_t
leaky_relu_negative_slope
,
int
group_num
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
void
fill_deconv_arg
(
struct
DeconvArgs
*
arg
,
framework
::
Tensor
*
input
,
void
fill_deconv_arg
(
struct
DeconvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
ActivationType
activation_enable
,
bool
relu_enabled
,
int
group_num
,
int
stride_h
,
int16_t
leaky_relu_negative_slope
,
int
group_num
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
void
fill_dwconv_arg
(
struct
DWconvArgs
*
arg
,
framework
::
Tensor
*
input
,
void
fill_dwconv_arg
(
struct
DWconvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
ActivationType
activation_enable
,
bool
relu_enabled
,
int
stride_h
,
int
stride_w
,
int16_t
leaky_relu_negative_slope
,
int
stride_h
,
int
padding_h
,
int
padding_w
,
float
*
bias_ptr
);
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bias_ptr
);
void
fill_DWDeconv_arg
(
struct
DWDeconvArgs
*
arg
,
framework
::
Tensor
*
input
,
void
fill_DWDeconv_arg
(
struct
DWDeconvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
ActivationType
activation_enable
,
bool
relu_enabled
,
int
stride_h
,
int
stride_w
,
int16_t
leaky_relu_negative_slope
,
int
stride_h
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
void
format_deconv_filter
(
framework
::
Tensor
*
filter_tensor
,
float
max_value
,
void
format_deconv_filter
(
framework
::
Tensor
*
filter_tensor
,
float
max_value
,
int
group_num
,
int
stride
);
int
group_num
,
int
stride
);
...
...
src/fpga/V2/pe.cpp
浏览文件 @
a1cc931d
...
@@ -115,6 +115,19 @@ using namespace std; // NOLINT
...
@@ -115,6 +115,19 @@ using namespace std; // NOLINT
/*conv*/
/*conv*/
#define REG_CONV_CMD 0xC00
#define REG_CONV_CMD 0xC00
#define REG_CONV_REG0 0xC08
#define REG_CONV_REG1 0xC10
#define REG_CONV_REG2 0xC18
#define REG_CONV_REG3 0xC20
#define REG_CONV_REG4 0xC28
#define REG_CONV_REG5 0xC30
#define REG_CONV_REG6 0xC38
#define REG_CONV_REG7 0xC40
#define REG_CONV_REG8 0xC48
#define REG_CONV_REG9 0xC50
#define REG_CONV_REG10 0xC58
#define REG_CONV_REG11 0xC60
#define REG_CONV_IMAGE_BASE_ADDR 0xC08
#define REG_CONV_IMAGE_BASE_ADDR 0xC08
#define REG_CONV_FILTER_BASE_ADDR 0xC10
#define REG_CONV_FILTER_BASE_ADDR 0xC10
#define REG_CONV_SB_BASE_ADDR 0xC18
#define REG_CONV_SB_BASE_ADDR 0xC18
...
@@ -194,7 +207,7 @@ int ComputeFpgaConv(const struct SplitConvArgs &args) {
...
@@ -194,7 +207,7 @@ int ComputeFpgaConv(const struct SplitConvArgs &args) {
int
ComputeBasicConv
(
const
struct
ConvArgs
&
args
)
{
int
ComputeBasicConv
(
const
struct
ConvArgs
&
args
)
{
#ifdef FPGA_PRINT_MODE
#ifdef FPGA_PRINT_MODE
DLOG
<<
"======Compute Basic Conv======"
;
DLOG
<<
"======Compute Basic Conv======"
;
// DLOG << " relu_enabled:" << args.relu_enabled
DLOG
<<
" relu_enabled:"
<<
args
.
relu_enabled
;
DLOG
<<
" sb_address:"
<<
args
.
sb_address
DLOG
<<
" sb_address:"
<<
args
.
sb_address
<<
" filter_address:"
<<
args
.
filter_address
<<
" filter_address:"
<<
args
.
filter_address
<<
" filter_num:"
<<
args
.
filter_num
<<
" filter_num:"
<<
args
.
filter_num
...
@@ -218,23 +231,23 @@ int ComputeBasicConv(const struct ConvArgs &args) {
...
@@ -218,23 +231,23 @@ int ComputeBasicConv(const struct ConvArgs &args) {
int
ret
=
0
;
int
ret
=
0
;
uint64_t
output_scale
=
0
;
uint64_t
output_scale
=
0
;
uint64_t
reg_ActivationArgs
=
0
;
//
uint64_t reg_ActivationArgs = 0;
// active function:{none,leakeyrelu,sigmoid,tanh}
// active function:{none,leakeyrelu,sigmoid,tanh}
ActivationArgs
active_args
;
//
ActivationArgs active_args;
// active_args.activation_type = LEAKYRELU;
// active_args.activation_type = LEAKYRELU;
active_args
.
activation_type
=
args
.
output
.
activation
.
activation_type
;
//
active_args.activation_type = args.output.activation.activation_type;
active_args
.
leaky_relu_negative_slope
=
//
active_args.leaky_relu_negative_slope =
args
.
output
.
activation
.
leaky_relu_negative_slope
;
//
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs
=
(
uint64_t
(
active_args
.
activation_type
)
<<
32
)
|
//
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args
.
leaky_relu_negative_slope
;
//
active_args.leaky_relu_negative_slope;
DLOG
<<
" activation_type:"
<<
active_args
.
activation_type
//
DLOG << " activation_type:" << active_args.activation_type
<<
" leaky_relu_negative_slope:"
//
<< " leaky_relu_negative_slope:"
<<
active_args
.
leaky_relu_negative_slope
;
//
<< active_args.leaky_relu_negative_slope;
DLOG
<<
" reg_ActivationArgs:"
<<
reg_ActivationArgs
;
//
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
pthread_mutex_lock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
pthread_mutex_lock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
if
(
ERROR
==
g_fpgainfo
.
pe_data
->
pes
[
PE_IDX_CONV
]
->
status
)
{
if
(
ERROR
==
g_fpgainfo
.
pe_data
->
pes
[
PE_IDX_CONV
]
->
status
)
{
...
@@ -243,63 +256,71 @@ int ComputeBasicConv(const struct ConvArgs &args) {
...
@@ -243,63 +256,71 @@ int ComputeBasicConv(const struct ConvArgs &args) {
pthread_mutex_unlock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
pthread_mutex_unlock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
return
ret
;
return
ret
;
}
}
// new
reg_writeq
((
args
.
driver
.
row_padding_down
<<
45
)
|
(
args
.
driver
.
row_padding_up
<<
34
)
|
(
args
.
driver
.
col_padding_down
<<
17
)
|
args
.
driver
.
col_padding_up
,
REG_CONV_REG0
);
reg_writeq
((
args
.
driver
.
image_win_cnt_last
<<
50
)
|
(
args
.
driver
.
image_win_cnt
<<
39
)
|
(
args
.
driver
.
image_block_amount_per_row
<<
20
)
|
args
.
driver
.
filter_pad_width_mul_channel
,
REG_CONV_REG1
);
reg_writeq
((
args
.
driver
.
stride_h
<<
48
)
|
(
args
.
driver
.
skip_window
<<
28
)
|
(
args
.
driver
.
filter_row
<<
8
)
|
(
args
.
driver
.
filter_height
<<
4
)
|
args
.
driver
.
filter_width
,
REG_CONV_REG2
);
reg_writeq
((
args
.
driver
.
filter_num
<<
42
)
|
(
args
.
driver
.
filter_align
<<
26
)
|
(
args
.
driver
.
prog_full_cnt
<<
16
)
|
args
.
driver
.
filter_amount_all
,
REG_CONV_REG3
);
reg_writeq
((
args
.
driver
.
post_prog_full_cnt
<<
54
)
|
(
args
.
driver
.
last_cal_res_row_num
<<
50
)
|
(
args
.
driver
.
cal_res_num
<<
39
)
|
(
args
.
driver
.
res_row_data_align4_pad
<<
35
)
|
(
args
.
driver
.
output_amount_per_row
<<
16
)
|
args
.
driver
.
output_width
,
REG_CONV_REG4
);
reg_writeq
((
args
.
driver
.
deconv_dump
<<
40
)
|
(
args
.
driver
.
deconv_ena
<<
39
)
|
(
args
.
driver
.
deconv_res_skip_row
<<
7
)
|
args
.
driver
.
deconv_skip_row
,
REG_CONV_REG5
);
reg_writeq
((
args
.
driver
.
result_amount_per_row_multi_para
<<
43
)
|
(
args
.
driver
.
output_height
<<
32
)
|
args
.
driver
.
output_address_phy
,
REG_CONV_REG6
);
reg_writeq
((
args
.
driver
.
filter_amount_whole
<<
48
)
|
(
args
.
driver
.
fpga_bias_scale_len
<<
32
)
|
args
.
driver
.
sb_address_phy
,
REG_CONV_REG7
);
reg_writeq
(
reg_ActivationArgs
,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
// active functoion
reg_writeq
(
output_scale
,
REG_SCALE_PARAMETER
);
reg_writeq
(
((
uint64_t
)
args
.
image
.
height
)
|
(((
uint64_t
)
args
.
image
.
width
)
<<
32
),
REG_CONV_IMAGE_PIXEL
);
reg_writeq
(
reg_writeq
(
((
uint64_t
)
args
.
kernel
.
height
)
|
(((
uint64_t
)
args
.
kernel
.
width
)
<<
32
),
(
args
.
driver
.
filters_amount_whole
<<
32
)
|
args
.
driver
.
filter_address_phy
,
REG_CONV_FILTER_PIXEL
);
REG_CONV_REG8
);
uint64_t
output_height_fraction
=
reg_writeq
((
args
.
driver
.
image_amount_per_row
<<
43
)
|
args
.
driver
.
output_height
/
ROW_PARALLEL_NUM
;
(
args
.
driver
.
image_hight
<<
32
)
|
uint64_t
output_height_remainder
=
args
.
driver
.
image_address_phy
,
args
.
driver
.
output_height
%
ROW_PARALLEL_NUM
;
REG_CONV_REG9
);
reg_writeq
(
args
.
driver
.
output_height
|
(
output_height_fraction
<<
16
)
|
(
output_height_remainder
<<
26
)
|
reg_writeq
((
args
.
driver
.
filter_pad_hight
<<
46
)
|
(
args
.
driver
.
output_width
<<
32
),
(
args
.
driver
.
image_amount_per_row_multi_win
<<
23
)
|
REG_CONV_RESULT_PIXEL
);
args
.
driver
.
image_amount_per_row_multi_win_first
,
reg_writeq
(((
uint64_t
)
args
.
image
.
pad_height
)
|
REG_CONV_REG10
);
(((
uint64_t
)
args
.
image
.
pad_width
)
<<
32
),
REG_CONV_PAD_PIXEL
);
reg_writeq
((
args
.
driver
.
image_block_num
<<
48
)
|
reg_writeq
(((
uint64_t
)
args
.
kernel
.
stride_h
)
|
(
args
.
driver
.
image_block_len
<<
24
)
|
(((
uint64_t
)
args
.
kernel
.
stride_w
)
<<
32
),
args
.
driver
.
image_block_len_last
,
REG_CONV_STEP_PIXEL
);
REG_CONV_REG11
);
reg_writeq
((
uint64_t
)
args
.
group_num
,
REG_CONV_GROUP_NUMBER
);
reg_writeq
((
uint64_t
)
args
.
filter_num
,
REG_CONV_FILTER_NUMBER
);
reg_writeq
((
uint64_t
)
args
.
image
.
channels
,
REG_CONV_CHANNEL_NUMBER
);
reg_writeq
(
*
(
uint64_t
*
)
args
.
image
.
scale_address
,
// NOLINT
REG_CONV_IMAGE_SCALE
);
reg_writeq
(
*
(
uint64_t
*
)
args
.
filter_scale_address
,
// NOLINT
REG_CONV_FILTER_SCALE
);
reg_writeq
(
args
.
driver
.
image_address_phy
,
REG_CONV_IMAGE_BASE_ADDR
);
reg_writeq
(
args
.
driver
.
filter_address_phy
,
REG_CONV_FILTER_BASE_ADDR
);
reg_writeq
(
args
.
driver
.
sb_address_phy
,
REG_CONV_SB_BASE_ADDR
);
reg_writeq
(
args
.
driver
.
output_address_phy
,
REG_CONV_RESULT_BASE_ADDR
);
reg_writeq
(
args
.
driver
.
filter_per_group
,
REG_CONV_FILTER_PER_GROUP
);
reg_writeq
(
args
.
driver
.
channel_per_group
,
REG_CONV_CHANNEL_PER_GROUP
);
reg_writeq
(
args
.
driver
.
image_amount_per_row
,
REG_CONV_IMAGE_AMOUNT_PER_ROW
);
reg_writeq
(
args
.
driver
.
image_one_pad_per_row
,
REG_CONV_IMAGE_ONE_PAD_PER_ROW
);
reg_writeq
(
args
.
driver
.
filter_amount_all
,
REG_CONV_FILTER_AMOUNT_ALL
);
reg_writeq
(
args
.
driver
.
output_amount_per_row
,
REG_CONV_RESULT_AMOUNT_PER_ROW
);
reg_writeq
(
args
.
driver
.
image_block_amount_per_row
,
0xca8
);
reg_writeq
(
args
.
driver
.
filter_pad_width_mul_channel
,
0xcb0
);
reg_writeq
(
args
.
driver
.
image_amount_per_row_multi_win_first
,
0xcb8
);
reg_writeq
(
args
.
driver
.
image_amount_per_row_multi_win
,
0xcc0
);
reg_writeq
(
args
.
driver
.
image_block_num
,
0xcc8
);
reg_writeq
(
args
.
driver
.
image_block_len
,
0xcd0
);
reg_writeq
(
args
.
driver
.
image_block_len_last
,
0xcd8
);
reg_writeq
(
args
.
driver
.
image_win_cnt
,
0xce0
);
reg_writeq
(
args
.
driver
.
image_win_cnt_last
,
0xce8
);
reg_writeq
(
args
.
driver
.
res_row_data_align4_pad
,
0xcf8
);
reg_writeq
(
args
.
driver
.
prog_full_cnt
,
0xd08
);
reg_writeq
(
args
.
driver
.
post_prog_full_cnt
,
0xd10
);
reg_writeq
(
args
.
driver
.
deconv_param
,
0xd18
);
reg_writeq
(
args
.
driver
.
fpga_bias_scale_len
/
4
,
0xd20
);
reg_writeq
(
args
.
driver
.
cmd
,
REG_CONV_CMD
);
reg_writeq
(
args
.
driver
.
cmd
,
REG_CONV_CMD
);
if
(
0
!=
fpga_regpoll
(
REG_INTERRUPT
,
INTERRUPT_CONV
,
PE_IRQ_TIMEOUT
))
{
if
(
0
!=
fpga_regpoll
(
REG_INTERRUPT
,
INTERRUPT_CONV
,
PE_IRQ_TIMEOUT
))
{
g_fpgainfo
.
pe_data
->
pes
[
PE_IDX_CONV
]
->
status
=
ERROR
;
g_fpgainfo
.
pe_data
->
pes
[
PE_IDX_CONV
]
->
status
=
ERROR
;
...
@@ -307,12 +328,7 @@ int ComputeBasicConv(const struct ConvArgs &args) {
...
@@ -307,12 +328,7 @@ int ComputeBasicConv(const struct ConvArgs &args) {
DLOG
<<
"Conv Wait Irq Timeout!"
;
DLOG
<<
"Conv Wait Irq Timeout!"
;
PADDLE_MOBILE_ENFORCE
(
0
,
"Conv Wait Irq Timeout"
);
PADDLE_MOBILE_ENFORCE
(
0
,
"Conv Wait Irq Timeout"
);
}
}
output_scale
=
reg_readq
(
REG_SCALE_PARAMETER
);
DLOG
<<
"after reg poll"
;
output_scale
=
(
output_scale
<<
32
)
|
(
output_scale
>>
32
);
fpga_copy
(
args
.
output
.
scale_address
,
&
output_scale
,
sizeof
(
float
)
*
2
);
active_args
.
activation_type
=
NONE
;
reg_writeq
(
reg_ActivationArgs
,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
pthread_mutex_unlock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
pthread_mutex_unlock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
...
@@ -350,22 +366,22 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
...
@@ -350,22 +366,22 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
uint64_t
image_physical_address
=
0
;
uint64_t
image_physical_address
=
0
;
uint64_t
output_physical_address
=
0
;
uint64_t
output_physical_address
=
0
;
uint64_t
reg_ActivationArgs
=
0
;
//
uint64_t reg_ActivationArgs = 0;
// active function:{none,leakeyrelu,sigmoid,tanh}
// active function:{none,leakeyrelu,sigmoid,tanh}
ActivationArgs
active_args
;
//
ActivationArgs active_args;
// active_args.activation_type = LEAKYRELU;
// active_args.activation_type = LEAKYRELU;
active_args
.
activation_type
=
args
.
output
.
activation
.
activation_type
;
//
active_args.activation_type = args.output.activation.activation_type;
active_args
.
leaky_relu_negative_slope
=
//
active_args.leaky_relu_negative_slope =
args
.
output
.
activation
.
leaky_relu_negative_slope
;
//
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs
=
(
uint64_t
(
active_args
.
activation_type
)
<<
32
)
|
//
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args
.
leaky_relu_negative_slope
;
//
active_args.leaky_relu_negative_slope;
DLOG
<<
" activation_type:"
<<
active_args
.
activation_type
//
DLOG << " activation_type:" << active_args.activation_type
<<
" leaky_relu_negative_slope:"
//
<< " leaky_relu_negative_slope:"
<<
active_args
.
leaky_relu_negative_slope
;
//
<< active_args.leaky_relu_negative_slope;
DLOG
<<
" reg_ActivationArgs:"
<<
reg_ActivationArgs
;
//
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
image_physical_address
=
vaddr_to_paddr_driver
(
args
.
image
.
address
);
image_physical_address
=
vaddr_to_paddr_driver
(
args
.
image
.
address
);
output_physical_address
=
vaddr_to_paddr_driver
(
args
.
output
.
address
);
output_physical_address
=
vaddr_to_paddr_driver
(
args
.
output
.
address
);
...
@@ -417,10 +433,10 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
...
@@ -417,10 +433,10 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
return
ret
;
return
ret
;
}
}
reg_writeq
(
reg_ActivationArgs
,
//
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
// active functoion
//
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq
(
output_scale
,
REG_SCALE_PARAMETER
);
//
reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq
(
image_physical_address
,
REG_POOLING_IMAGE_BASE_ADDR
);
reg_writeq
(
image_physical_address
,
REG_POOLING_IMAGE_BASE_ADDR
);
reg_writeq
(
output_physical_address
,
REG_POOLING_RESULT_BASE_ADDR
);
reg_writeq
(
output_physical_address
,
REG_POOLING_RESULT_BASE_ADDR
);
reg_writeq
(
reg_writeq
(
...
@@ -462,12 +478,12 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
...
@@ -462,12 +478,12 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
DLOG
<<
"after reg poll"
;
DLOG
<<
"after reg poll"
;
// *(args.output.scale_address) = reg_readq(REG_SCALE_PARAMETER);
// *(args.output.scale_address) = reg_readq(REG_SCALE_PARAMETER);
output_scale
=
reg_readq
(
REG_SCALE_PARAMETER
);
//
output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale
=
(
output_scale
<<
32
)
|
(
output_scale
>>
32
);
//
output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy
(
args
.
output
.
scale_address
,
&
output_scale
,
sizeof
(
float
)
*
2
);
//
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
active_args
.
activation_type
=
NONE
;
//
active_args.activation_type = NONE;
reg_writeq
(
reg_ActivationArgs
,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
//
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
pthread_mutex_unlock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
...
@@ -479,7 +495,7 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
...
@@ -479,7 +495,7 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
int
ComputeFpgaEWAdd
(
const
struct
EWAddArgs
&
args
)
{
int
ComputeFpgaEWAdd
(
const
struct
EWAddArgs
&
args
)
{
#ifdef FPGA_PRINT_MODE
#ifdef FPGA_PRINT_MODE
DLOG
<<
"=============ComputeFpgaEWAdd==========="
;
DLOG
<<
"=============ComputeFpgaEWAdd==========="
;
// DLOG << " relu_enabled:" << args.relu_enabled
DLOG
<<
" relu_enabled:"
<<
args
.
relu_enabled
;
DLOG
<<
" const0:"
<<
fp16_2_fp32
(
int16_t
(
args
.
const0
))
DLOG
<<
" const0:"
<<
fp16_2_fp32
(
int16_t
(
args
.
const0
))
<<
" const1:"
<<
fp16_2_fp32
(
int16_t
(
args
.
const1
));
<<
" const1:"
<<
fp16_2_fp32
(
int16_t
(
args
.
const1
));
DLOG
<<
" image0_address:"
<<
args
.
image0
.
address
DLOG
<<
" image0_address:"
<<
args
.
image0
.
address
...
@@ -503,17 +519,17 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
...
@@ -503,17 +519,17 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
int
ret
=
0
;
int
ret
=
0
;
uint64_t
output_scale
=
0
;
uint64_t
output_scale
=
0
;
uint64_t
reg_ActivationArgs
=
0
;
//
uint64_t reg_ActivationArgs = 0;
ActivationArgs
active_args
;
//
ActivationArgs active_args;
active_args
.
activation_type
=
args
.
output
.
activation
.
activation_type
;
//
active_args.activation_type = args.output.activation.activation_type;
active_args
.
leaky_relu_negative_slope
=
//
active_args.leaky_relu_negative_slope =
args
.
output
.
activation
.
leaky_relu_negative_slope
;
//
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs
=
(
uint64_t
(
active_args
.
activation_type
)
<<
32
)
|
//
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args
.
leaky_relu_negative_slope
;
//
active_args.leaky_relu_negative_slope;
DLOG
<<
" activation_type:"
<<
active_args
.
activation_type
//
DLOG << " activation_type:" << active_args.activation_type
<<
" leaky_relu_negative_slope:"
//
<< " leaky_relu_negative_slope:"
<<
active_args
.
leaky_relu_negative_slope
;
//
<< active_args.leaky_relu_negative_slope;
DLOG
<<
" reg_ActivationArgs:"
<<
reg_ActivationArgs
;
//
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
pthread_mutex_lock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
pthread_mutex_lock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
if
(
ERROR
==
g_fpgainfo
.
pe_data
->
pes
[
PE_IDX_EW
]
->
status
)
{
if
(
ERROR
==
g_fpgainfo
.
pe_data
->
pes
[
PE_IDX_EW
]
->
status
)
{
...
@@ -523,8 +539,8 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
...
@@ -523,8 +539,8 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
return
ret
;
return
ret
;
}
}
reg_writeq
(
reg_ActivationArgs
,
//
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
// active functoion
//
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq
(
output_scale
,
REG_SCALE_PARAMETER
);
reg_writeq
(
output_scale
,
REG_SCALE_PARAMETER
);
reg_writeq
(
args
.
driver
.
image0_address_phy
,
REG_EW_IMAGE0_BASE_ADDR
);
reg_writeq
(
args
.
driver
.
image0_address_phy
,
REG_EW_IMAGE0_BASE_ADDR
);
...
@@ -543,11 +559,11 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
...
@@ -543,11 +559,11 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
PADDLE_MOBILE_ENFORCE
(
0
,
"EW Wait Irq Timeout!"
);
PADDLE_MOBILE_ENFORCE
(
0
,
"EW Wait Irq Timeout!"
);
}
}
output_scale
=
reg_readq
(
REG_SCALE_PARAMETER
);
//
output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale
=
(
output_scale
<<
32
)
|
(
output_scale
>>
32
);
//
output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy
(
args
.
output
.
scale_address
,
&
output_scale
,
sizeof
(
float
)
*
2
);
//
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
active_args
.
activation_type
=
NONE
;
//
active_args.activation_type = NONE;
reg_writeq
(
reg_ActivationArgs
,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
//
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
pthread_mutex_unlock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
return
ret
;
return
ret
;
...
...
src/fpga/common/fpga_common.cpp
浏览文件 @
a1cc931d
...
@@ -200,10 +200,10 @@ uint64_t vaddr_to_paddr(void *address) {
...
@@ -200,10 +200,10 @@ uint64_t vaddr_to_paddr(void *address) {
}
}
uint32_t
paddle_mobile_version
()
{
uint32_t
paddle_mobile_version
()
{
uint32_t
v_master
=
35
;
uint32_t
v_master
=
52
;
uint32_t
v_slave
=
35
;
uint32_t
v_slave
=
52
;
uint32_t
first
=
1
,
second
=
2
,
fourth_master
=
1
,
fourth_slave
=
2
;
uint32_t
first
=
1
,
second
=
2
,
fourth_master
=
1
,
fourth_slave
=
1
;
uint32_t
master
=
first
<<
24
|
second
<<
16
|
v_master
<<
8
|
fourth_master
;
uint32_t
master
=
first
<<
24
|
second
<<
16
|
v_master
<<
8
|
fourth_master
;
uint32_t
slave
=
first
<<
24
|
second
<<
16
|
v_slave
<<
8
|
fourth_slave
;
uint32_t
slave
=
first
<<
24
|
second
<<
16
|
v_slave
<<
8
|
fourth_slave
;
...
...
src/fpga/common/fpga_common.h
浏览文件 @
a1cc931d
...
@@ -32,8 +32,12 @@ limitations under the License. */
...
@@ -32,8 +32,12 @@ limitations under the License. */
#define FILTER_NUM_ALIGNMENT (32) // Filter number aligned to 32
#define FILTER_NUM_ALIGNMENT (32) // Filter number aligned to 32
#define FILTER_ELEMENT_ALIGNMENT (16) // Filter element number aligned to 16
#define FILTER_ELEMENT_ALIGNMENT (16) // Filter element number aligned to 16
#define BS_NUM_ALIGNMENT (8)
#define BS_NUM_ALIGNMENT (8)
#define BIAS_SCALE_DMA_NUM (4)
#define RESULT_ALIGNMENT (32)
#define PE_COLUMN (8)
#define ROW_PARALLEL_NUM (2)
#define BIAS_NUM_ALIGNMENT (16)
#define BIAS_NUM_ALIGNMENT (16)
#define ROW_PARALLEL_NUM (3)
#endif
#endif
namespace
paddle_mobile
{
namespace
paddle_mobile
{
...
@@ -89,37 +93,59 @@ struct ImageOutputArgs {
...
@@ -89,37 +93,59 @@ struct ImageOutputArgs {
};
};
struct
ConvDriverParam
{
struct
ConvDriverParam
{
uint64_t
image_address_phy
;
uint64_t
filter_address_phy
;
uint64_t
sb_address_phy
;
uint64_t
output_address_phy
;
uint64_t
output_height
;
uint64_t
output_width
;
uint64_t
filter_per_group
;
uint64_t
filter_per_group
;
uint64_t
channel_per_group
;
uint64_t
channel_per_group
;
uint64_t
image_amount_per_row
;
uint64_t
image_one_pad_per_row
;
uint64_t
image_one_pad_per_row
;
uint64_t
filter_amount_all
;
uint64_t
deconv_param
;
uint64_t
output_amount_per_row
;
uint64_t
col_padding_up
;
uint64_t
col_padding_down
;
uint64_t
row_padding_up
;
uint64_t
row_padding_down
;
uint64_t
image_block_amount_per_row
;
uint64_t
image_block_amount_per_row
;
uint64_t
filter_pad_width_mul_channel
;
uint64_t
filter_pad_width_mul_channel
;
uint64_t
image_amount_per_row_multi_win_first
;
uint64_t
image_amount_per_row_multi_win
;
uint64_t
image_block_num
;
uint64_t
image_block_len
;
uint64_t
image_block_len_last
;
uint64_t
image_win_cnt
;
uint64_t
image_win_cnt
;
uint64_t
image_win_cnt_last
;
uint64_t
image_win_cnt_last
;
uint64_t
res_row_data_align4_pad
;
uint64_t
filter_row
;
uint64_t
filter_width
;
uint64_t
filter_height
;
uint64_t
skip_window
;
uint64_t
stride_h
;
uint64_t
filter_amount_all
;
uint64_t
prog_full_cnt
;
uint64_t
prog_full_cnt
;
uint64_t
filter_align
;
uint64_t
filter_num
;
uint64_t
output_width
;
uint64_t
output_amount_per_row
;
uint64_t
res_row_data_align4_pad
;
uint64_t
cal_res_num
;
uint64_t
last_cal_res_row_num
;
uint64_t
post_prog_full_cnt
;
uint64_t
post_prog_full_cnt
;
uint64_t
deconv_skip_row
;
// paralvl*deconv_group
uint64_t
deconv_res_skip_row
;
// deconv_group * result_amount_per_row
uint64_t
deconv_ena
;
uint64_t
deconv_dump
;
uint64_t
output_address_phy
;
uint64_t
output_height
;
uint64_t
result_amount_per_row_multi_para
;
uint64_t
sb_address_phy
;
uint64_t
fpga_bias_scale_len
;
uint64_t
fpga_bias_scale_len
;
uint64_t
cmd
;
uint64_t
filter_amount_whole
;
uint64_t
filter_address_phy
;
uint64_t
filters_amount_whole
;
uint64_t
image_address_phy
;
uint64_t
image_hight
;
uint64_t
image_amount_per_row
;
uint64_t
image_amount_per_row_multi_win_first
;
uint64_t
image_amount_per_row_multi_win
;
uint64_t
filter_pad_hight
;
uint64_t
image_block_num
;
uint64_t
image_block_len
;
uint64_t
image_block_len_last
;
uint64_t
deconv_param
;
uint64_t
cmd
;
};
};
struct
EWAddDriverParam
{
struct
EWAddDriverParam
{
...
@@ -141,6 +167,7 @@ struct DeconvTxParm {
...
@@ -141,6 +167,7 @@ struct DeconvTxParm {
};
};
struct
ConvArgs
{
struct
ConvArgs
{
bool
relu_enabled
;
void
*
sb_address
;
// scale and bias
void
*
sb_address
;
// scale and bias
void
*
filter_address
;
void
*
filter_address
;
float
*
filter_scale_address
;
float
*
filter_scale_address
;
...
@@ -209,6 +236,7 @@ struct PoolingArgs {
...
@@ -209,6 +236,7 @@ struct PoolingArgs {
};
};
struct
EWAddArgs
{
struct
EWAddArgs
{
bool
relu_enabled
;
uint32_t
const0
;
// output0 = const0 x input0 + const1 x input1;
uint32_t
const0
;
// output0 = const0 x input0 + const1 x input1;
uint32_t
const1
;
uint32_t
const1
;
struct
ImageInputArgs
image0
;
struct
ImageInputArgs
image0
;
...
@@ -238,6 +266,7 @@ struct DeconvArgs {
...
@@ -238,6 +266,7 @@ struct DeconvArgs {
};
};
struct
DWconvArgs
{
struct
DWconvArgs
{
uint32_t
sub_conv_num
;
uint32_t
sub_conv_num
;
bool
relu_enabled
;
void
*
bias_address
;
void
*
bias_address
;
void
*
filter_address
;
void
*
filter_address
;
struct
KernelArgs
kernel
;
struct
KernelArgs
kernel
;
...
...
src/framework/executor.cpp
浏览文件 @
a1cc931d
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#include "framework/executor.h"
#include "framework/executor.h"
#include <algorithm>
#include <algorithm>
#include <unordered_map>
#include <utility>
#include <utility>
#include <vector>
#include <vector>
#include "common/enforce.h"
#include "common/enforce.h"
...
@@ -638,7 +639,8 @@ std::map<std::string, float> LoadQuantValFromFile(std::string filename) {
...
@@ -638,7 +639,8 @@ std::map<std::string, float> LoadQuantValFromFile(std::string filename) {
std
::
ifstream
in
;
std
::
ifstream
in
;
in
.
open
(
filename
,
std
::
ios
::
in
);
in
.
open
(
filename
,
std
::
ios
::
in
);
if
(
!
in
.
is_open
())
{
if
(
!
in
.
is_open
())
{
std
::
cout
<<
"open File Failed."
<<
std
::
endl
;
// std::cout << "open File Failed." << std::endl;
DLOG
<<
"open File Failed."
;
exit
(
-
1
);
exit
(
-
1
);
}
}
...
...
src/operators/kernel/fpga/V2/conv_add_bn_kernel.cpp
浏览文件 @
a1cc931d
...
@@ -22,6 +22,7 @@ namespace operators {
...
@@ -22,6 +22,7 @@ namespace operators {
template
<
>
template
<
>
bool
ConvAddBNKernel
<
FPGA
,
float
>::
Init
(
FusionConvAddBNParam
<
FPGA
>
*
param
)
{
bool
ConvAddBNKernel
<
FPGA
,
float
>::
Init
(
FusionConvAddBNParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
false
;
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
NONE
;
paddle_mobile
::
fpga
::
NONE
;
int16_t
leaky_relu_negative_slope
=
0
;
int16_t
leaky_relu_negative_slope
=
0
;
...
@@ -34,7 +35,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
...
@@ -34,7 +35,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
auto
out
=
param
->
Output
();
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
...
@@ -64,10 +65,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
...
@@ -64,10 +65,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
param
->
SetFpgaArgs
(
conv_arg
);
delete
new_scale
;
delete
new_scale
;
...
...
src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp
浏览文件 @
a1cc931d
...
@@ -23,9 +23,9 @@ namespace operators {
...
@@ -23,9 +23,9 @@ namespace operators {
template
<
>
template
<
>
bool
ConvAddBNReluKernel
<
FPGA
,
float
>::
Init
(
bool
ConvAddBNReluKernel
<
FPGA
,
float
>::
Init
(
FusionConvAddBNReluParam
<
FPGA
>
*
param
)
{
FusionConvAddBNReluParam
<
FPGA
>
*
param
)
{
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
bool
relu_enabled
=
true
;
paddle_mobile
::
fpga
::
LEAKYRELU
;
// paddle_mobile::fpga::ActivationType activation_enable =
int16_t
leaky_relu_negative_slope
=
0
;
// paddle_mobile::fpga::LEAKYRELU
;
auto
input
=
const_cast
<
LoDTensor
*>
(
param
->
Input
());
auto
input
=
const_cast
<
LoDTensor
*>
(
param
->
Input
());
auto
bias
=
param
->
Bias
();
auto
bias
=
param
->
Bias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
...
@@ -34,7 +34,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
...
@@ -34,7 +34,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
const
int
groups
=
param
->
Groups
();
const
int
groups
=
param
->
Groups
();
float
Si
=
input
->
scale
[
0
];
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
vector
<
int
>
paddings
=
param
->
Paddings
();
vector
<
int
>
paddings
=
param
->
Paddings
();
vector
<
int
>
strides
=
param
->
Strides
();
vector
<
int
>
strides
=
param
->
Strides
();
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
...
@@ -70,17 +70,17 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
...
@@ -70,17 +70,17 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
if
(
groups
==
channel
)
{
if
(
groups
==
channel
)
{
fpga
::
format_dwconv_data
(
filter
,
out
,
new_scale_ptr
,
&
new_bias_ptr
);
fpga
::
format_dwconv_data
(
filter
,
out
,
new_scale_ptr
,
&
new_bias_ptr
);
fpga
::
DWconvArgs
dwconv_arg
=
{
0
};
fpga
::
DWconvArgs
dwconv_arg
=
{
0
};
fpga
::
fill_dwconv_arg
(
&
dwconv_arg
,
input
,
out
,
filter
,
activation_enable
,
fpga
::
fill_dwconv_arg
(
&
dwconv_arg
,
input
,
out
,
filter
,
relu_enabled
,
leaky_relu_negative_slope
,
strides
[
0
],
stride
s
[
1
],
strides
[
0
],
strides
[
1
],
paddings
[
0
],
padding
s
[
1
],
paddings
[
0
],
paddings
[
1
],
new_bias_ptr
);
new_bias_ptr
);
param
->
SetFpgaArgs
(
dwconv_arg
);
param
->
SetFpgaArgs
(
dwconv_arg
);
fpga
::
fpga_free
(
bs_ptr
);
fpga
::
fpga_free
(
bs_ptr
);
}
else
{
}
else
{
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
leaky_relu_negative_slope
,
param
->
Groups
(),
stride
s
[
0
],
param
->
Groups
(),
strides
[
0
],
strides
[
1
],
padding
s
[
0
],
strides
[
1
],
paddings
[
0
],
paddings
[
1
],
bs_ptr
);
paddings
[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
param
->
SetFpgaArgs
(
conv_arg
);
}
}
delete
new_scale
;
delete
new_scale
;
...
...
src/operators/kernel/fpga/V2/conv_add_kernel.cpp
浏览文件 @
a1cc931d
...
@@ -31,7 +31,7 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
...
@@ -31,7 +31,7 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
auto
out
=
param
->
Output
();
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
"Output channel should be equal to bias number"
);
"Output channel should be equal to bias number"
);
...
@@ -45,8 +45,7 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
...
@@ -45,8 +45,7 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
false
,
param
->
Groups
(),
leaky_relu_negative_slope
,
param
->
Groups
(),
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
param
->
SetFpgaArgs
(
conv_arg
);
...
...
src/operators/kernel/fpga/V2/conv_add_relu_kernel.cpp
浏览文件 @
a1cc931d
...
@@ -31,7 +31,7 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
...
@@ -31,7 +31,7 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
auto
out
=
param
->
Output
();
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
"Output channel should be equal to bias number"
);
"Output channel should be equal to bias number"
);
...
@@ -45,8 +45,7 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
...
@@ -45,8 +45,7 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
true
,
param
->
Groups
(),
leaky_relu_negative_slope
,
param
->
Groups
(),
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
param
->
SetFpgaArgs
(
conv_arg
);
...
...
src/operators/kernel/fpga/V2/conv_bn_kernel.cpp
浏览文件 @
a1cc931d
...
@@ -30,7 +30,7 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
...
@@ -30,7 +30,7 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
auto
out
=
param
->
Output
();
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
auto
bn_scale_ptr
=
param
->
InputScale
()
->
data
<
float
>
();
auto
bn_scale_ptr
=
param
->
InputScale
()
->
data
<
float
>
();
...
@@ -56,8 +56,7 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
...
@@ -56,8 +56,7 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
false
,
param
->
Groups
(),
leaky_relu_negative_slope
,
param
->
Groups
(),
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
param
->
SetFpgaArgs
(
conv_arg
);
...
...
src/operators/kernel/fpga/V2/conv_bn_relu_kernel.cpp
浏览文件 @
a1cc931d
...
@@ -29,7 +29,7 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
...
@@ -29,7 +29,7 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
auto
out
=
param
->
Output
();
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
auto
bn_scale_ptr
=
param
->
InputScale
()
->
data
<
float
>
();
auto
bn_scale_ptr
=
param
->
InputScale
()
->
data
<
float
>
();
...
@@ -58,17 +58,16 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
...
@@ -58,17 +58,16 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
if
(
groups
==
channel
)
{
if
(
groups
==
channel
)
{
fpga
::
format_dwconv_data
(
filter
,
out
,
new_scale_ptr
,
&
new_bias_ptr
);
fpga
::
format_dwconv_data
(
filter
,
out
,
new_scale_ptr
,
&
new_bias_ptr
);
fpga
::
DWconvArgs
dwconv_arg
=
{
0
};
fpga
::
DWconvArgs
dwconv_arg
=
{
0
};
fpga
::
fill_dwconv_arg
(
&
dwconv_arg
,
input
,
out
,
filter
,
activation_enabl
e
,
fpga
::
fill_dwconv_arg
(
&
dwconv_arg
,
input
,
out
,
filter
,
tru
e
,
leaky_relu_negative_slope
,
param
->
Strides
()[
0
],
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
param
->
Paddings
()[
1
],
new_bias_ptr
);
new_bias_ptr
);
param
->
SetFpgaArgs
(
dwconv_arg
);
param
->
SetFpgaArgs
(
dwconv_arg
);
fpga
::
fpga_free
(
bs_ptr
);
fpga
::
fpga_free
(
bs_ptr
);
}
else
{
}
else
{
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
true
,
param
->
Groups
(),
leaky_relu_negative_slope
,
param
->
Groups
(),
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
param
->
SetFpgaArgs
(
conv_arg
);
...
...
src/operators/kernel/fpga/V2/conv_kernel.cpp
浏览文件 @
a1cc931d
...
@@ -29,7 +29,7 @@ bool ConvKernel<FPGA, float>::Init(ConvParam<FPGA> *param) {
...
@@ -29,7 +29,7 @@ bool ConvKernel<FPGA, float>::Init(ConvParam<FPGA> *param) {
auto
out
=
param
->
Output
();
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
int
channel
=
out
->
dims
()[
1
];
int
channel
=
out
->
dims
()[
1
];
auto
bs_ptr
=
auto
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
// NOLINT
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
// NOLINT
...
@@ -40,8 +40,7 @@ bool ConvKernel<FPGA, float>::Init(ConvParam<FPGA> *param) {
...
@@ -40,8 +40,7 @@ bool ConvKernel<FPGA, float>::Init(ConvParam<FPGA> *param) {
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
false
,
param
->
Groups
(),
leaky_relu_negative_slope
,
param
->
Groups
(),
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
param
->
SetFpgaArgs
(
conv_arg
);
...
...
src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp
浏览文件 @
a1cc931d
...
@@ -31,7 +31,7 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
...
@@ -31,7 +31,7 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
auto
out
=
param
->
Output
();
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
int
channel
=
out
->
dims
()[
1
];
int
channel
=
out
->
dims
()[
1
];
...
@@ -58,8 +58,7 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
...
@@ -58,8 +58,7 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
sub_conv_n
);
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
false
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
...
@@ -70,10 +69,10 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
...
@@ -70,10 +69,10 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
}
}
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
activation_enabl
e
,
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
fals
e
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
deconv_arg
);
param
->
SetFpgaArgs
(
deconv_arg
);
}
}
return
true
;
return
true
;
...
...
src/operators/kernel/fpga/V2/deconv_add_bn_kernel.cpp
浏览文件 @
a1cc931d
...
@@ -33,7 +33,7 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
...
@@ -33,7 +33,7 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
auto
out
=
param
->
Output
();
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
"Output channel should be equal to bias number"
);
"Output channel should be equal to bias number"
);
int
channel
=
out
->
dims
()[
1
];
int
channel
=
out
->
dims
()[
1
];
...
@@ -61,8 +61,7 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
...
@@ -61,8 +61,7 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
sub_conv_n
);
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
false
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
...
@@ -73,10 +72,10 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
...
@@ -73,10 +72,10 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
}
}
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
activation_enabl
e
,
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
fals
e
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
deconv_arg
);
param
->
SetFpgaArgs
(
deconv_arg
);
}
}
return
true
;
return
true
;
...
...
src/operators/kernel/fpga/V2/deconv_add_bn_relu_kernel.cpp
浏览文件 @
a1cc931d
...
@@ -34,7 +34,7 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
...
@@ -34,7 +34,7 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
auto
out
=
param
->
Output
();
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
"Output channel should be equal to bias number"
);
"Output channel should be equal to bias number"
);
int
channel
=
out
->
dims
()[
1
];
int
channel
=
out
->
dims
()[
1
];
...
@@ -62,8 +62,7 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
...
@@ -62,8 +62,7 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
sub_conv_n
);
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
true
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
...
@@ -74,10 +73,10 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
...
@@ -74,10 +73,10 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
}
}
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
activation_enabl
e
,
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
tru
e
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
deconv_arg
);
param
->
SetFpgaArgs
(
deconv_arg
);
}
}
return
true
;
return
true
;
...
...
src/operators/kernel/fpga/V2/deconv_add_kernel.cpp
浏览文件 @
a1cc931d
...
@@ -33,7 +33,7 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
...
@@ -33,7 +33,7 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
auto
out
=
param
->
Output
();
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
"Output channel should be equal to bias number"
);
"Output channel should be equal to bias number"
);
int
channel
=
out
->
dims
()[
1
];
int
channel
=
out
->
dims
()[
1
];
...
@@ -61,8 +61,7 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
...
@@ -61,8 +61,7 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
sub_conv_n
);
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
false
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
...
@@ -73,10 +72,10 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
...
@@ -73,10 +72,10 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
}
}
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
activation_enabl
e
,
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
fals
e
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
deconv_arg
);
param
->
SetFpgaArgs
(
deconv_arg
);
}
}
...
...
src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp
浏览文件 @
a1cc931d
...
@@ -34,7 +34,7 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
...
@@ -34,7 +34,7 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
auto
out
=
param
->
Output
();
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
"Output channel should be equal to bias number"
);
"Output channel should be equal to bias number"
);
int
channel
=
out
->
dims
()[
1
];
int
channel
=
out
->
dims
()[
1
];
...
@@ -57,8 +57,7 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
...
@@ -57,8 +57,7 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
sub_conv_n
);
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
true
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
...
@@ -69,10 +68,10 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
...
@@ -69,10 +68,10 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
}
}
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
activation_enabl
e
,
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
tru
e
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
deconv_arg
);
param
->
SetFpgaArgs
(
deconv_arg
);
}
}
return
true
;
return
true
;
...
...
src/operators/kernel/fpga/V2/deconv_bn_relu_kernel.cpp
浏览文件 @
a1cc931d
...
@@ -35,7 +35,7 @@ bool DeconvBNReluKernel<FPGA, float>::Init(
...
@@ -35,7 +35,7 @@ bool DeconvBNReluKernel<FPGA, float>::Init(
auto
out
=
param
->
Output
();
auto
out
=
param
->
Output
();
float
Si
=
input
->
scale
[
0
];
float
Si
=
input
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
So
=
out
->
scale
[
0
];
float
Sf
=
fpga
::
filter_find_max
(
filter
)
/
127
;
float
Sf
=
fpga
::
filter_find_max
(
filter
);
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
auto
bn_scale_ptr
=
param
->
InputScale
()
->
data
<
float
>
();
auto
bn_scale_ptr
=
param
->
InputScale
()
->
data
<
float
>
();
...
@@ -80,18 +80,17 @@ bool DeconvBNReluKernel<FPGA, float>::Init(
...
@@ -80,18 +80,17 @@ bool DeconvBNReluKernel<FPGA, float>::Init(
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
sub_conv_n
);
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
true
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
}
else
{
}
else
{
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
activation_enabl
e
,
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
tru
e
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
deconv_arg
);
param
->
SetFpgaArgs
(
deconv_arg
);
}
}
delete
new_scale
;
delete
new_scale
;
...
...
src/operators/kernel/fpga/V2/feed_kernel.cpp
浏览文件 @
a1cc931d
...
@@ -44,7 +44,6 @@ void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> ¶m) {
...
@@ -44,7 +44,6 @@ void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> ¶m) {
}
}
fpga
::
format_image
(
input
);
fpga
::
format_image
(
input
);
output
->
ShareDataWith
(
*
input
);
output
->
ShareDataWith
(
*
input
);
input
->
external_data
=
nullptr
;
}
}
template
class
FeedKernel
<
FPGA
,
float
>;
template
class
FeedKernel
<
FPGA
,
float
>;
...
...
src/operators/kernel/fpga/V2/fusion_fc_kernel.cpp
浏览文件 @
a1cc931d
...
@@ -20,6 +20,7 @@ namespace operators {
...
@@ -20,6 +20,7 @@ namespace operators {
template
<
>
template
<
>
bool
FusionFcKernel
<
FPGA
,
float
>::
Init
(
FusionFcParam
<
FPGA
>
*
param
)
{
bool
FusionFcKernel
<
FPGA
,
float
>::
Init
(
FusionFcParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
false
;
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
NONE
;
paddle_mobile
::
fpga
::
NONE
;
int16_t
leaky_relu_negative_slope
=
0
;
int16_t
leaky_relu_negative_slope
=
0
;
...
@@ -58,8 +59,8 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
...
@@ -58,8 +59,8 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
fpga
::
format_ofm
(
out
);
fpga
::
format_ofm
(
out
);
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input_x
,
out
,
filter
,
activation_enable
,
fpga
::
fill_split_arg
(
&
conv_arg
,
input_x
,
out
,
filter
,
relu_enabled
,
1
,
1
,
1
,
leaky_relu_negative_slope
,
1
,
1
,
1
,
0
,
0
,
bs_ptr
);
0
,
0
,
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
param
->
SetFpgaArgs
(
conv_arg
);
return
true
;
return
true
;
}
}
...
...
src/operators/kernel/fpga/V2/fusion_fc_relu_kernel.cpp
浏览文件 @
a1cc931d
...
@@ -20,6 +20,7 @@ namespace operators {
...
@@ -20,6 +20,7 @@ namespace operators {
template
<
>
template
<
>
bool
FusionFcReluKernel
<
FPGA
,
float
>::
Init
(
FusionFcReluParam
<
FPGA
>
*
param
)
{
bool
FusionFcReluKernel
<
FPGA
,
float
>::
Init
(
FusionFcReluParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
false
;
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
LEAKYRELU
;
paddle_mobile
::
fpga
::
LEAKYRELU
;
int16_t
leaky_relu_negative_slope
=
0
;
int16_t
leaky_relu_negative_slope
=
0
;
...
@@ -58,8 +59,8 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
...
@@ -58,8 +59,8 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
fpga
::
format_ofm
(
out
);
fpga
::
format_ofm
(
out
);
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input_x
,
out
,
filter
,
activation_enable
,
fpga
::
fill_split_arg
(
&
conv_arg
,
input_x
,
out
,
filter
,
relu_enabled
,
1
,
1
,
1
,
leaky_relu_negative_slope
,
1
,
1
,
1
,
0
,
0
,
bs_ptr
);
0
,
0
,
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
param
->
SetFpgaArgs
(
conv_arg
);
return
true
;
return
true
;
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录