Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
71f88533
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
338
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
71f88533
编写于
9月 10, 2018
作者:
xiebaiyuan
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'upstream/develop' into develop
上级
10da6491
937ecbb6
变更
25
隐藏空白更改
内联
并排
Showing
25 changed file
with
705 addition
and
354 deletion
+705
-354
CMakeLists.txt
CMakeLists.txt
+10
-3
src/fpga/api.cpp
src/fpga/api.cpp
+70
-43
src/fpga/api.h
src/fpga/api.h
+28
-3
src/fpga/image.cpp
src/fpga/image.cpp
+4
-0
src/fpga/image.h
src/fpga/image.h
+7
-0
src/framework/dim.h
src/framework/dim.h
+1
-0
src/operators/feed_op.h
src/operators/feed_op.h
+1
-1
src/operators/kernel/arm/dropout_kernel.cpp
src/operators/kernel/arm/dropout_kernel.cpp
+7
-3
src/operators/kernel/fpga/concat_kernel.cpp
src/operators/kernel/fpga/concat_kernel.cpp
+33
-20
src/operators/kernel/fpga/conv_add_bn_kernel.cpp
src/operators/kernel/fpga/conv_add_bn_kernel.cpp
+73
-26
src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
+43
-24
src/operators/kernel/fpga/conv_add_relu_kernel.cpp
src/operators/kernel/fpga/conv_add_relu_kernel.cpp
+40
-23
src/operators/kernel/fpga/conv_bn_kernel.cpp
src/operators/kernel/fpga/conv_bn_kernel.cpp
+41
-25
src/operators/kernel/fpga/conv_bn_relu_kernel.cpp
src/operators/kernel/fpga/conv_bn_relu_kernel.cpp
+42
-24
src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp
src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp
+12
-12
src/operators/kernel/fpga/fc_relu_kernel.cpp
src/operators/kernel/fpga/fc_relu_kernel.cpp
+50
-37
src/operators/kernel/fpga/fusion_fc_kernel.cpp
src/operators/kernel/fpga/fusion_fc_kernel.cpp
+53
-33
src/operators/kernel/fpga/pool_kernel.cpp
src/operators/kernel/fpga/pool_kernel.cpp
+11
-11
src/operators/kernel/fpga/softmax_kernel.cpp
src/operators/kernel/fpga/softmax_kernel.cpp
+2
-2
src/operators/math/gemm.cpp
src/operators/math/gemm.cpp
+3
-1
src/operators/op_param.h
src/operators/op_param.h
+43
-63
test/CMakeLists.txt
test/CMakeLists.txt
+11
-0
test/net/test_alexnet.cpp
test/net/test_alexnet.cpp
+59
-0
test/net/test_inceptionv4.cpp
test/net/test_inceptionv4.cpp
+59
-0
test/test_helper.h
test/test_helper.h
+2
-0
未找到文件。
CMakeLists.txt
浏览文件 @
71f88533
cmake_minimum_required
(
VERSION 3.6
)
option
(
USE_OPENMP
"openmp support"
O
N
)
option
(
USE_OPENMP
"openmp support"
O
FF
)
project
(
paddle-mobile
)
option
(
DEBUGING
"enable debug mode"
O
FF
)
option
(
DEBUGING
"enable debug mode"
O
N
)
option
(
USE_EXCEPTION
"use std exception"
OFF
)
option
(
LOG_PROFILE
"log profile"
OFF
)
# select the platform to build
...
...
@@ -94,6 +94,8 @@ else()
endif
()
if
(
FPGA
)
set
(
DEBUGING ON
)
add_definitions
(
-DPADDLE_MOBILE_DEBUG
)
add_definitions
(
-DPADDLE_MOBILE_FPGA
)
else
()
file
(
GLOB_RECURSE _tmp_list src/operators/kernel/fpga/*.cpp src/operators/kernel/fpga/*.cc
)
...
...
@@ -140,7 +142,12 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY build)
set
(
CMAKE_RUNTIME_OUTPUT_DIRECTORY build
)
# NET default
set
(
NET
"default"
CACHE STRING
"select net type"
)
if
(
FPGA
)
set
(
NET
"FPGAnets"
CACHE STRING
"select net type"
)
else
()
set
(
NET
"default"
CACHE STRING
"select net type"
)
endif
()
set_property
(
CACHE NET PROPERTY STRINGS
"default"
"googlenet"
"mobilenet"
"yolo"
"squeezenet"
"FPGAnets"
"NLP"
)
include
(
"
${
CMAKE_CURRENT_LIST_DIR
}
/tools/op.cmake"
)
...
...
src/fpga/api.cpp
浏览文件 @
71f88533
...
...
@@ -68,29 +68,35 @@ void fpga_copy(void *dest, const void *src, size_t num) {
memcpy
(
dest
,
src
,
num
);
}
int
ComputeFpgaConv
(
const
struct
ConvArgs
&
args
)
{
int
ComputeFpgaConv
(
const
struct
Wrapper
ConvArgs
&
args
)
{
#ifdef FPGA_TEST_MODE
DLOG
<<
" relu_enabled:"
<<
args
.
relu_enabled
<<
" sb_address:"
<<
args
.
sb_address
<<
" filter_address:"
<<
args
.
filter_address
<<
" filter_num:"
<<
args
.
filter_num
<<
" group_num:"
<<
args
.
group_num
;
DLOG
<<
" image_address:"
<<
args
.
image
.
address
<<
" image_scale_address:"
<<
args
.
image
.
scale_address
<<
" image_channels:"
<<
args
.
image
.
channels
<<
" image_height:"
<<
args
.
image
.
height
<<
" image_width:"
<<
args
.
image
.
width
<<
" pad_height:"
<<
args
.
image
.
pad_height
<<
" pad_width:"
<<
args
.
image
.
pad_width
;
DLOG
<<
" kernel_height:"
<<
args
.
kernel
.
height
<<
" kernel_width:"
<<
args
.
kernel
.
width
<<
" stride_h:"
<<
args
.
kernel
.
stride_h
<<
" stride_w:"
<<
args
.
kernel
.
stride_w
;
DLOG
<<
" out_address:"
<<
args
.
output
.
address
<<
" out_scale_address:"
<<
args
.
output
.
scale_address
;
/*
DLOG << " relu_enabled:" << args.relu_enabled
<< " sb_address:" << args.sb_address
<< " filter_address:" << args.filter_address
<< " filter_num:" << args.filter_num
<< " group_num:" << args.group_num;
DLOG << " image_address:" << args.image.address
<< " image_scale_address:" << args.image.scale_address
<< " image_channels:" << args.image.channels
<< " image_height:" << args.image.height
<< " image_width:" << args.image.width
<< " pad_height:" << args.image.pad_height
<< " pad_width:" << args.image.pad_width;
DLOG << " kernel_height:" << args.kernel.height
<< " kernel_width:" << args.kernel.width
<< " stride_h:" << args.kernel.stride_h
<< " stride_w:" << args.kernel.stride_w;
DLOG << " out_address:" << args.output.address
<< " out_scale_address:" << args.output.scale_address;*/
#endif
int
split_num
=
args
.
split_num
;
for
(
int
i
=
0
;
i
<
split_num
;
i
++
)
{
do_ioctl
(
IOCTL_CONFIG_CONV
,
&
args
.
conv_args
[
i
]);
}
return
do_ioctl
(
IOCTL_CONFIG_CONV
,
&
args
);
if
(
split_num
>
1
)
{
ComputeFPGAConcat
(
args
.
concat_arg
);
}
}
int
ComputeFpgaPool
(
const
struct
PoolingArgs
&
args
)
{
...
...
@@ -155,9 +161,16 @@ int PerformBypass(const struct BypassArgs &args) {
return
do_ioctl
(
IOCTL_CONFIG_BYPASS
,
&
args
);
}
int
ComputeFPGAConcat
(
const
struct
ConcatArgs
&
args
)
{
image
::
concat_images
(
args
.
images_in
,
args
.
scales_in
,
args
.
image_out
,
args
.
scale_out
,
args
.
image_num
,
args
.
channel_num
,
args
.
height
,
args
.
width
);
return
0
;
}
void
format_image
(
framework
::
Tensor
*
image_tensor
)
{
auto
dims
=
image_tensor
->
dims
();
int
channel
=
dims
[
1
],
height
=
dims
[
2
],
width
=
dims
[
3
];
auto
channel
=
dims
[
1
],
height
=
dims
[
2
],
width
=
dims
[
3
];
auto
data_ptr
=
image_tensor
->
mutable_data
<
float
>
();
size_t
memory_size
=
channel
*
height
*
width
*
sizeof
(
float
);
float
*
new_data
=
(
float
*
)
fpga_malloc
(
memory_size
);
...
...
@@ -168,7 +181,7 @@ void format_image(framework::Tensor *image_tensor) {
void
format_ofm
(
framework
::
Tensor
*
ofm_tensor
)
{
auto
dims
=
ofm_tensor
->
dims
();
int
channel
=
dims
[
1
],
height
=
dims
[
2
],
width
=
dims
[
3
];
auto
channel
=
dims
[
1
],
height
=
dims
[
2
],
width
=
dims
[
3
];
size_t
memory_size
=
height
*
align_to_x
(
channel
*
width
,
IMAGE_ALIGNMENT
)
*
sizeof
(
half
);
ofm_tensor
->
reset_data_ptr
(
fpga_malloc
(
memory_size
));
...
...
@@ -178,38 +191,38 @@ float filter_find_max(framework::Tensor *filter_tensor) {
auto
filter_ptr
=
filter_tensor
->
data
<
float
>
();
return
filter
::
find_max
(
filter_ptr
,
filter_tensor
->
numel
());
}
int
get_plit_num
(
framework
::
Tensor
*
filter_tensor
)
{
auto
dims
=
filter_tensor
->
dims
();
auto
chw
=
dims
[
1
]
*
dims
[
2
]
*
dims
[
3
];
auto
num
=
dims
[
0
];
int
div_capacity
=
filter
::
calc_division_capacity
(
chw
);
return
filter
::
calc_split_num
(
num
,
div_capacity
);
}
int
get_element_num_per_div
(
framework
::
Tensor
*
filter_tensor
,
int
group_num
)
{
auto
dims
=
filter_tensor
->
dims
();
PADDLE_MOBILE_ENFORCE
(
dims
.
size
()
==
4
||
dims
.
size
()
==
2
,
"Filter order should be 4 or 2"
);
int
chw
=
dims
.
size
()
==
4
?
dims
[
1
]
*
dims
[
2
]
*
dims
[
3
]
:
dims
[
1
];
int
num
=
dims
.
size
()
==
4
?
dims
[
0
]
:
dims
[
1
];
auto
chw
=
dims
[
1
]
*
dims
[
2
]
*
dims
[
3
];
auto
num
=
dims
[
0
];
int
div_capacity
=
filter
::
calc_division_capacity
(
chw
);
return
filter
::
calc_num_per_div
(
num
,
group_num
,
div_capacity
);
}
void
format_filter
(
framework
::
Tensor
*
filter_tensor
,
float
max_value
,
int
group_num
)
{
auto
dims
=
filter_tensor
->
dims
();
int
num
=
dims
[
0
],
channel
=
dims
[
1
],
height
=
dims
[
2
],
width
=
dims
[
3
];
auto
data_ptr
=
filter_tensor
->
mutable_data
<
float
>
();
size_t
memory_size
=
num
*
channel
*
height
*
width
*
sizeof
(
float
);
float
*
new_data
=
(
float
*
)
fpga_malloc
(
memory_size
);
fpga_copy
(
new_data
,
data_ptr
,
memory_size
);
filter
::
format_filter
(
&
new_data
,
num
,
channel
,
height
,
width
,
group_num
,
max_value
);
filter_tensor
->
reset_data_ptr
(
new_data
);
int
get_aligned_filter_element_num
(
int
chw
)
{
return
align_to_x
(
chw
,
FILTER_ELEMENT_ALIGNMENT
);
}
int
get_aligned_filter_num
(
int
num
)
{
return
align_to_x
(
num
,
FILTER_NUM_ALIGNMENT
);
}
void
format_f
c_matrix
(
framework
::
Tensor
*
filter_tensor
,
float
max_value
,
int
group_num
,
int
height
,
int
width
)
{
void
format_f
ilter
(
framework
::
Tensor
*
filter_tensor
,
float
max_value
,
int
group_num
)
{
auto
dims
=
filter_tensor
->
dims
();
PADDLE_MOBILE_ENFORCE
(
height
==
1
&&
width
==
1
,
"IFM should be flattened for FC"
);
int
num
=
dims
[
1
],
channel
=
dims
[
0
]
/
height
/
width
;
auto
num
=
dims
[
0
],
channel
=
dims
[
1
],
height
=
dims
[
2
],
width
=
dims
[
3
];
auto
data_ptr
=
filter_tensor
->
mutable_data
<
float
>
();
size_t
memory_size
=
num
*
channel
*
height
*
width
*
sizeof
(
float
);
float
*
new_data
=
(
float
*
)
fpga_malloc
(
memory_size
);
auto
new_data
=
(
float
*
)
fpga_malloc
(
memory_size
);
fpga_copy
(
new_data
,
data_ptr
,
memory_size
);
filter
::
format_filter
(
&
new_data
,
num
,
channel
,
height
,
width
,
group_num
,
max_value
);
...
...
@@ -222,5 +235,19 @@ void format_bias_scale_array(float **bias_scale_array,
element_num_per_division
,
num
);
}
void
format_concat_output
(
framework
::
Tensor
*
out
,
int
height
,
int
width
,
int
image_num
,
uint32_t
*
channel_num
)
{
int
sum_channel
=
0
,
sum_cw
=
0
;
for
(
int
i
=
0
;
i
<
image_num
;
i
++
)
{
sum_channel
+=
channel_num
[
i
];
}
sum_cw
=
align_to_x
(
width
*
sum_channel
,
IMAGE_ALIGNMENT
);
auto
data_ptr
=
fpga_malloc
(
height
*
sum_cw
*
sizeof
(
half
));
auto
ddim
=
framework
::
make_ddim
({
-
1
,
sum_channel
,
height
,
width
});
out
->
Resize
(
ddim
);
out
->
reset_data_ptr
(
data_ptr
);
}
}
// namespace fpga
}
// namespace paddle_mobile
src/fpga/api.h
浏览文件 @
71f88533
...
...
@@ -92,6 +92,26 @@ struct ConvArgs {
struct
ImageOutputArgs
output
;
};
struct
ConcatArgs
{
uint32_t
image_num
;
half
**
images_in
;
float
**
scales_in
;
void
*
image_out
;
float
*
scale_out
;
uint32_t
*
channel_num
;
uint32_t
height
;
uint32_t
width
;
};
struct
WrapperConvArgs
{
uint32_t
split_num
;
uint32_t
group_num
;
uint32_t
filter_num
;
struct
ImageOutputArgs
output
;
struct
ConvArgs
*
conv_args
;
struct
ConcatArgs
concat_arg
;
};
struct
PoolingArgs
{
struct
KernelArgs
kernel
;
struct
ImageInputArgs
image
;
// input image;
...
...
@@ -165,21 +185,26 @@ enum FPGA_ERR_TYPE {
//============================== API =============================
int
PerformBypass
(
const
struct
BypassArgs
&
args
);
int
ComputeFpgaConv
(
const
struct
ConvArgs
&
args
);
int
ComputeFpgaConv
(
const
struct
Wrapper
ConvArgs
&
args
);
int
ComputeFpgaPool
(
const
struct
PoolingArgs
&
args
);
int
ComputeFpgaEWAdd
(
const
struct
EWAddArgs
&
args
);
int
ComputeFPGAConcat
(
const
struct
ConcatArgs
&
args
);
static
inline
int
align_to_x
(
int
num
,
int
x
)
{
return
(
num
+
x
-
1
)
/
x
*
x
;
}
void
format_image
(
framework
::
Tensor
*
image_tensor
);
void
format_ofm
(
framework
::
Tensor
*
ofm_tensor
);
// only allocate memory
float
filter_find_max
(
framework
::
Tensor
*
filter_tensor
);
int
get_element_num_per_div
(
framework
::
Tensor
*
filter_tensor
,
int
group_num
);
int
get_plit_num
(
framework
::
Tensor
*
filter_tensor
);
int
get_aligned_filter_element_num
(
int
chw
);
int
get_aligned_filter_num
(
int
num
);
void
format_filter
(
framework
::
Tensor
*
filter_tensor
,
float
max_value
,
int
group_num
);
void
format_fc_matrix
(
framework
::
Tensor
*
filter_tensor
,
float
max_value
,
int
group_num
,
int
height
=
1
,
int
width
=
1
);
void
format_bias_scale_array
(
float
**
bias_scale_array
,
int
element_num_per_division
,
int
num
);
void
format_concat_output
(
framework
::
Tensor
*
out
,
int
height
,
int
width
,
int
image_num
,
uint32_t
*
channel_num
);
}
// namespace fpga
}
// namespace paddle_mobile
src/fpga/image.cpp
浏览文件 @
71f88533
...
...
@@ -62,6 +62,10 @@ void format_image(float **data_in, int channel, int height, int width) {
align_element_conv
(
data_in
,
height
,
channel
*
width
);
}
void
concat_images
(
int16_t
**
images_in
,
float
**
scales_in
,
void
*
image_out
,
float
*
scale_out
,
int
image_num
,
uint32_t
*
channel_num
,
int
height
,
int
width
)
{}
}
// namespace image
}
// namespace fpga
}
// namespace paddle_mobile
src/fpga/image.h
浏览文件 @
71f88533
...
...
@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <stdint.h>
#define IMAGE_ALIGNMENT 16 // Aligned to 16
namespace
paddle_mobile
{
namespace
fpga
{
...
...
@@ -21,6 +24,10 @@ namespace image {
void
convert_to_hwc
(
float
**
data_in
,
int
channel
,
int
height
,
int
width
);
void
align_element_conv
(
float
**
data_in
,
int
height
,
int
cw
);
void
format_image
(
float
**
data_in
,
int
channel
,
int
height
,
int
width
);
void
concat_images
(
int16_t
**
images_in
,
float
**
scales_in
,
void
*
image_out
,
float
*
scale_out
,
int
image_num
,
uint32_t
*
channel_num
,
int
height
,
int
width
);
// Concat featuremaps along channel direction
}
// namespace image
}
// namespace fpga
}
// namespace paddle_mobile
src/framework/dim.h
浏览文件 @
71f88533
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <cstdlib>
#include <string>
#include "common/enforce.h"
namespace
paddle_mobile
{
namespace
framework
{
...
...
src/operators/feed_op.h
浏览文件 @
71f88533
...
...
@@ -49,7 +49,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
}
void
RunImpl
()
const
{
Tensor
*
input
=
const_cast
<
Tensor
*>
(
param_
.
InputX
());
auto
input
=
(
Tensor
*
)
const_cast
<
LoD
Tensor
*>
(
param_
.
InputX
());
auto
input_ptr
=
input
->
data
<
float
>
();
fpga
::
format_image
(
input
);
Tensor
*
output
=
param_
.
Out
();
...
...
src/operators/kernel/arm/dropout_kernel.cpp
浏览文件 @
71f88533
...
...
@@ -27,7 +27,11 @@ bool DropoutKernel<CPU, float>::Init(DropoutParam<CPU> *para) {
template
<
typename
T
>
struct
DropoutFunctor
{
inline
T
operator
()(
T
in
)
const
{
return
in
;
}
DropoutFunctor
(
T
drop_pro
)
:
dropout_pro_
(
drop_pro
)
{}
inline
T
operator
()(
T
in
)
const
{
return
(
1
-
dropout_pro_
)
*
in
;
}
private:
T
dropout_pro_
;
};
template
<
>
...
...
@@ -36,8 +40,8 @@ void DropoutKernel<CPU, float>::Compute(const DropoutParam<CPU> ¶m) const {
auto
*
input_x_ptr
=
input_x
->
data
<
float
>
();
auto
*
out
=
param
.
Out
();
auto
*
out_ptr
=
out
->
mutable_data
<
float
>
();
DropoutFunctor
<
float
>
func_
;
const
float
dropoutProb
=
param
.
DropoutProb
();
DropoutFunctor
<
float
>
func_
(
dropoutProb
)
;
math
::
Transform
trans
;
trans
(
input_x_ptr
,
input_x_ptr
+
input_x
->
numel
(),
out_ptr
,
func_
);
}
...
...
src/operators/kernel/fpga/concat_kernel.cpp
浏览文件 @
71f88533
...
...
@@ -21,31 +21,44 @@ namespace operators {
template
<
>
bool
ConcatKernel
<
FPGA
,
float
>::
Init
(
ConcatParam
<
FPGA
>
*
param
)
{
auto
inputs
=
param
->
Inputs
();
auto
out
=
param
->
Out
();
auto
image_num
=
inputs
.
size
();
auto
images_in
=
(
half
**
)
fpga
::
fpga_malloc
(
image_num
*
sizeof
(
int
*
));
auto
scales_in
=
(
float
**
)
fpga
::
fpga_malloc
(
image_num
*
sizeof
(
float
*
));
auto
channel_num
=
(
uint32_t
*
)
fpga
::
fpga_malloc
(
image_num
*
sizeof
(
uint32_t
));
auto
height
=
inputs
[
0
]
->
dims
()[
2
];
auto
width
=
inputs
[
0
]
->
dims
()[
3
];
for
(
int
i
=
0
;
i
<
image_num
;
i
++
)
{
auto
input
=
inputs
[
i
];
PADDLE_MOBILE_ENFORCE
(
input
->
dims
()[
2
]
==
height
&&
input
->
dims
()[
3
]
==
width
,
"Image height & width should be unified"
);
images_in
[
i
]
=
(
half
*
)
input
->
data
<
float
>
();
channel_num
[
i
]
=
(
uint32_t
)
inputs
[
i
]
->
dims
()[
1
];
scales_in
[
i
]
=
input
->
scale
;
}
fpga
::
format_concat_output
(
out
,
(
int
)
height
,
(
int
)
width
,
(
int
)
image_num
,
channel_num
);
fpga
::
ConcatArgs
concatArgs
;
concatArgs
.
image_num
=
(
uint32_t
)
image_num
;
concatArgs
.
images_in
=
images_in
;
concatArgs
.
scales_in
=
scales_in
;
concatArgs
.
image_out
=
(
half
*
)
out
->
mutable_data
<
float
>
();
concatArgs
.
scale_out
=
out
->
scale
;
concatArgs
.
channel_num
=
channel_num
;
concatArgs
.
height
=
(
uint32_t
)
height
;
concatArgs
.
width
=
(
uint32_t
)
width
;
param
->
SetFpgaArgs
(
concatArgs
);
return
true
;
}
template
<
>
void
ConcatKernel
<
FPGA
,
float
>::
Compute
(
const
ConcatParam
<
FPGA
>
&
param
)
const
{
auto
inputs
=
param
.
Inputs
();
auto
*
out
=
param
.
Out
();
int64_t
axis
=
param
.
Axis
();
out
->
mutable_data
<
half
>
();
DDim
out_dim
=
out
->
dims
();
int
pixels
=
out_dim
[
1
]
*
out_dim
[
2
];
auto
out_channel
=
out_dim
[
3
];
auto
out_offset
=
0
;
for
(
int
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
auto
input
=
inputs
[
i
];
auto
channels
=
input
->
dims
()[
3
];
out_offset
+=
channels
;
auto
src
=
input
->
data
<
half
>
();
for
(
int
j
=
0
;
j
<
pixels
;
++
j
)
{
auto
dst
=
out
->
mutable_data
<
half
>
()
+
out_offset
;
memory
::
Copy
(
dst
,
src
,
sizeof
(
half
));
}
}
ComputeFPGAConcat
(
param
.
FpgaArgs
());
}
template
class
ConcatKernel
<
FPGA
,
float
>;
...
...
src/operators/kernel/fpga/conv_add_bn_kernel.cpp
浏览文件 @
71f88533
...
...
@@ -15,7 +15,6 @@ limitations under the License. */
#ifdef FUSION_CONVADDBN_OP
#include "operators/kernel/conv_add_bn_kernel.h"
#include "fpga/api.h"
namespace
paddle_mobile
{
namespace
operators
{
...
...
@@ -23,13 +22,13 @@ namespace operators {
template
<
>
bool
ConvAddBNKernel
<
FPGA
,
float
>::
Init
(
FusionConvAddBNParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
false
;
Tensor
*
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input_ptr
=
input
->
data
<
float
>
();
const
Tensor
*
bias
=
param
->
Bias
();
auto
bias
=
param
->
Bias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
Tensor
*
filter
=
param
->
Filter
(
);
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
()
);
Tensor
*
out
=
param
->
Output
();
auto
out
=
param
->
Output
();
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
...
...
@@ -41,10 +40,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
"Output channel should be equal to bias number"
);
const
int
channel
=
out
->
dims
()[
1
];
float
*
bs_ptr
=
auto
bs_ptr
=
reinterpret_cast
<
float
*>
(
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
)));
Tensor
*
new_scale
=
new
Tensor
();
Tensor
*
new_bias
=
new
Tensor
();
auto
new_scale
=
new
Tensor
();
auto
new_bias
=
new
Tensor
();
auto
new_scale_ptr
=
new_scale
->
mutable_data
<
float
>
({
channel
});
auto
new_bias_ptr
=
new_bias
->
mutable_data
<
float
>
({
channel
});
...
...
@@ -70,27 +69,75 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
fpga
::
format_ofm
(
out
);
auto
out_ptr
=
out
->
mutable_data
<
float
>
();
fpga
::
ConvArgs
convArgs
;
convArgs
.
relu_enabled
=
relu_enabled
;
convArgs
.
filter_address
=
(
void
*
)
filter_ptr
;
convArgs
.
filter_num
=
filter
->
dims
()[
0
];
convArgs
.
group_num
=
param
->
Groups
();
convArgs
.
sb_address
=
(
void
*
)
bs_ptr
;
convArgs
.
kernel
.
stride_h
=
param
->
Strides
()[
0
];
convArgs
.
kernel
.
stride_w
=
param
->
Strides
()[
1
];
convArgs
.
kernel
.
height
=
filter
->
dims
()[
2
];
convArgs
.
kernel
.
width
=
filter
->
dims
()[
3
];
convArgs
.
image
.
address
=
(
void
*
)
input_ptr
;
convArgs
.
image
.
channels
=
input
->
dims
()[
1
];
convArgs
.
image
.
height
=
input
->
dims
()[
2
];
convArgs
.
image
.
width
=
input
->
dims
()[
3
];
convArgs
.
image
.
pad_height
=
param
->
Paddings
()[
0
];
convArgs
.
image
.
pad_width
=
param
->
Paddings
()[
1
];
convArgs
.
image
.
scale_address
=
input
->
scale
;
convArgs
.
output
.
address
=
(
void
*
)
out_ptr
;
fpga
::
WrapperConvArgs
convArgs
;
convArgs
.
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
split_num
=
(
uint32_t
)
fpga
::
get_plit_num
(
filter
);
convArgs
.
filter_num
=
(
uint32_t
)
filter
->
dims
()[
0
];
convArgs
.
output
.
address
=
out_ptr
;
convArgs
.
output
.
scale_address
=
out
->
scale
;
convArgs
.
conv_args
=
(
fpga
::
ConvArgs
*
)
fpga
::
fpga_malloc
(
convArgs
.
split_num
*
sizeof
(
fpga
::
ConvArgs
));
convArgs
.
concat_arg
.
image_num
=
convArgs
.
split_num
;
convArgs
.
concat_arg
.
image_out
=
out_ptr
;
convArgs
.
concat_arg
.
scale_out
=
out
->
scale
;
convArgs
.
concat_arg
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
convArgs
.
concat_arg
.
width
=
(
uint32_t
)
filter
->
dims
()[
3
];
int
n
=
convArgs
.
split_num
;
convArgs
.
concat_arg
.
images_in
=
(
half
**
)
fpga
::
fpga_malloc
(
n
*
sizeof
(
int
*
));
convArgs
.
concat_arg
.
scales_in
=
(
float
**
)
fpga
::
fpga_malloc
(
n
*
sizeof
(
float
*
));
convArgs
.
concat_arg
.
channel_num
=
(
uint32_t
*
)
fpga
::
fpga_malloc
(
n
*
sizeof
(
uint32_t
));
convArgs
.
concat_arg
.
image_out
=
out_ptr
;
param
->
SetFpgaArgs
(
convArgs
);
int
element_num
=
fpga
::
get_aligned_filter_element_num
(
filter
->
dims
()[
1
]
*
filter
->
dims
()[
2
]
*
filter
->
dims
()[
3
]);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
convArgs
.
conv_args
[
i
].
relu_enabled
=
relu_enabled
;
convArgs
.
conv_args
[
i
].
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
conv_args
[
i
].
kernel
.
stride_h
=
(
uint32_t
)
param
->
Strides
()[
0
];
convArgs
.
conv_args
[
i
].
kernel
.
stride_w
=
(
uint32_t
)
param
->
Strides
()[
1
];
convArgs
.
conv_args
[
i
].
kernel
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
convArgs
.
conv_args
[
i
].
kernel
.
width
=
(
uint32_t
)
filter
->
dims
()[
3
];
convArgs
.
conv_args
[
i
].
image
.
address
=
input_ptr
;
convArgs
.
conv_args
[
i
].
image
.
channels
=
(
uint32_t
)
input
->
dims
()[
1
];
convArgs
.
conv_args
[
i
].
image
.
height
=
(
uint32_t
)
input
->
dims
()[
2
];
convArgs
.
conv_args
[
i
].
image
.
width
=
(
uint32_t
)
input
->
dims
()[
3
];
convArgs
.
conv_args
[
i
].
image
.
scale_address
=
input
->
scale
;
convArgs
.
conv_args
[
i
].
image
.
pad_height
=
(
uint32_t
)
param
->
Paddings
()[
0
];
convArgs
.
conv_args
[
i
].
image
.
pad_width
=
(
uint32_t
)
param
->
Paddings
()[
1
];
convArgs
.
conv_args
[
i
].
filter_address
=
&
((
int8_t
*
)
filter_ptr
)[
i
*
element_num
];
convArgs
.
conv_args
[
i
].
sb_address
=
&
((
int8_t
*
)
bs_ptr
)[
i
*
element_num
];
convArgs
.
conv_args
[
i
].
filter_num
=
(
uint32_t
)(
i
==
n
-
1
?
fpga
::
get_aligned_filter_num
(
channel
-
(
n
-
1
)
*
element_num_per_div
)
:
element_num_per_div
);
if
(
n
>
1
)
{
convArgs
.
conv_args
[
i
].
output
.
scale_address
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
sizeof
(
float
));
convArgs
.
conv_args
[
i
].
output
.
address
=
fpga
::
fpga_malloc
(
input
->
dims
()[
2
]
*
input
->
dims
()[
3
]
*
convArgs
.
conv_args
[
i
].
filter_num
*
sizeof
(
half
));
}
else
{
convArgs
.
conv_args
[
i
].
output
.
scale_address
=
out
->
scale
;
convArgs
.
conv_args
[
i
].
output
.
address
=
out_ptr
;
}
convArgs
.
concat_arg
.
images_in
[
i
]
=
(
half
*
)
convArgs
.
conv_args
[
i
].
output
.
address
;
convArgs
.
concat_arg
.
scales_in
[
i
]
=
(
float
*
)
convArgs
.
conv_args
[
i
].
sb_address
;
convArgs
.
concat_arg
.
channel_num
[
i
]
=
convArgs
.
conv_args
[
i
].
filter_num
;
}
return
true
;
}
...
...
src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
浏览文件 @
71f88533
...
...
@@ -23,12 +23,12 @@ template <>
bool
ConvAddBNReluKernel
<
FPGA
,
float
>::
Init
(
FusionConvAddBNReluParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
true
;
Tensor
*
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input_ptr
=
input
->
data
<
float
>
();
const
Tensor
*
bias
=
param
->
Bias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
Tensor
*
filter
=
param
->
Filter
(
);
Tensor
*
out
=
param
->
Output
();
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
()
);
auto
out
=
param
->
Output
();
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
auto
bn_scale_ptr
=
param
->
InputScale
()
->
data
<
float
>
();
...
...
@@ -39,9 +39,9 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
"Output channel should be equal to bias number"
);
const
int
channel
=
out
->
dims
()[
1
];
float
*
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
Tensor
*
new_scale
=
new
Tensor
();
Tensor
*
new_bias
=
new
Tensor
();
auto
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
auto
new_scale
=
new
Tensor
();
auto
new_bias
=
new
Tensor
();
auto
new_scale_ptr
=
new_scale
->
mutable_data
<
float
>
({
channel
});
auto
new_bias_ptr
=
new_bias
->
mutable_data
<
float
>
({
channel
});
...
...
@@ -67,26 +67,45 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
fpga
::
format_ofm
(
out
);
auto
out_ptr
=
out
->
mutable_data
<
float
>
();
fpga
::
ConvArgs
convArgs
;
convArgs
.
relu_enabled
=
relu_enabled
;
convArgs
.
filter_address
=
(
void
*
)
filter_ptr
;
convArgs
.
filter_num
=
filter
->
dims
()[
0
];
convArgs
.
group_num
=
param
->
Groups
();
convArgs
.
sb_address
=
(
void
*
)
bs_ptr
;
convArgs
.
kernel
.
stride_h
=
param
->
Strides
()[
0
];
convArgs
.
kernel
.
stride_w
=
param
->
Strides
()[
1
];
convArgs
.
kernel
.
height
=
filter
->
dims
()[
2
];
convArgs
.
kernel
.
width
=
filter
->
dims
()[
3
];
convArgs
.
image
.
address
=
(
void
*
)
input_ptr
;
convArgs
.
image
.
channels
=
input
->
dims
()[
1
];
convArgs
.
image
.
height
=
input
->
dims
()[
2
];
convArgs
.
image
.
width
=
input
->
dims
()[
3
];
convArgs
.
image
.
pad_height
=
param
->
Paddings
()[
0
];
convArgs
.
image
.
pad_width
=
param
->
Paddings
()[
1
];
convArgs
.
image
.
scale_address
=
input
->
scale
;
convArgs
.
output
.
address
=
(
void
*
)
out_ptr
;
fpga
::
WrapperConvArgs
convArgs
;
convArgs
.
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
split_num
=
(
uint32_t
)
fpga
::
get_plit_num
(
filter
);
convArgs
.
filter_num
=
(
uint32_t
)
filter
->
dims
()[
0
];
convArgs
.
output
.
address
=
out_ptr
;
convArgs
.
output
.
scale_address
=
out
->
scale
;
convArgs
.
conv_args
=
(
fpga
::
ConvArgs
*
)
fpga
::
fpga_malloc
(
convArgs
.
split_num
*
sizeof
(
fpga
::
ConvArgs
));
param
->
SetFpgaArgs
(
convArgs
);
int
element_num
=
fpga
::
get_aligned_filter_element_num
(
filter
->
dims
()[
1
]
*
filter
->
dims
()[
2
]
*
filter
->
dims
()[
3
]);
int
n
=
convArgs
.
split_num
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
convArgs
.
conv_args
[
i
].
relu_enabled
=
relu_enabled
;
convArgs
.
conv_args
[
i
].
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
conv_args
[
i
].
kernel
.
stride_h
=
(
uint32_t
)
param
->
Strides
()[
0
];
convArgs
.
conv_args
[
i
].
kernel
.
stride_w
=
(
uint32_t
)
param
->
Strides
()[
1
];
convArgs
.
conv_args
[
i
].
kernel
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
convArgs
.
conv_args
[
i
].
kernel
.
width
=
(
uint32_t
)
filter
->
dims
()[
3
];
convArgs
.
conv_args
[
i
].
image
.
address
=
input_ptr
;
convArgs
.
conv_args
[
i
].
image
.
channels
=
(
uint32_t
)
input
->
dims
()[
1
];
convArgs
.
conv_args
[
i
].
image
.
height
=
(
uint32_t
)
input
->
dims
()[
2
];
convArgs
.
conv_args
[
i
].
image
.
width
=
(
uint32_t
)
input
->
dims
()[
3
];
convArgs
.
conv_args
[
i
].
image
.
pad_height
=
(
uint32_t
)
param
->
Paddings
()[
0
];
convArgs
.
conv_args
[
i
].
image
.
pad_width
=
(
uint32_t
)
param
->
Paddings
()[
1
];
convArgs
.
conv_args
[
i
].
filter_address
=
&
((
int8_t
*
)
filter_ptr
)[
i
*
element_num
];
convArgs
.
conv_args
[
i
].
sb_address
=
&
((
int8_t
*
)
bs_ptr
)[
i
*
element_num
];
convArgs
.
conv_args
[
i
].
filter_num
=
(
uint32_t
)(
i
==
n
-
1
?
fpga
::
get_aligned_filter_num
(
channel
-
(
n
-
1
)
*
element_num_per_div
)
:
element_num_per_div
);
convArgs
.
conv_args
[
i
].
output
.
scale_address
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
sizeof
(
float
));
convArgs
.
conv_args
[
i
].
image
.
scale_address
=
input
->
scale
;
}
return
true
;
return
true
;
}
...
...
src/operators/kernel/fpga/conv_add_relu_kernel.cpp
浏览文件 @
71f88533
...
...
@@ -22,17 +22,17 @@ namespace operators {
template
<
>
bool
ConvAddReluKernel
<
FPGA
,
float
>::
Init
(
FusionConvAddReluParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
true
;
Tensor
*
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input_ptr
=
input
->
data
<
float
>
();
const
Tensor
*
bias
=
param
->
Bias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
Tensor
*
filter
=
param
->
Filter
(
);
Tensor
*
out
=
param
->
Output
();
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
()
);
auto
out
=
param
->
Output
();
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
"Output channel should be equal to bias number"
);
int
channel
=
out
->
dims
()[
1
];
float
*
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
auto
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
for
(
int
i
=
0
;
i
<
channel
;
i
++
)
{
bs_ptr
[
i
+
channel
]
=
1
;
bs_ptr
[
i
]
=
bias_ptr
[
i
];
...
...
@@ -49,27 +49,44 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
fpga
::
format_ofm
(
out
);
auto
out_ptr
=
out
->
mutable_data
<
float
>
();
fpga
::
ConvArgs
convArgs
;
convArgs
.
relu_enabled
=
relu_enabled
;
convArgs
.
filter_address
=
(
void
*
)
filter_ptr
;
convArgs
.
filter_num
=
filter
->
dims
()[
0
];
convArgs
.
group_num
=
param
->
Groups
();
convArgs
.
sb_address
=
(
void
*
)
bs_ptr
;
convArgs
.
kernel
.
stride_h
=
param
->
Strides
()[
0
];
convArgs
.
kernel
.
stride_w
=
param
->
Strides
()[
1
];
convArgs
.
kernel
.
height
=
filter
->
dims
()[
2
];
convArgs
.
kernel
.
width
=
filter
->
dims
()[
3
];
convArgs
.
image
.
address
=
(
void
*
)
input_ptr
;
convArgs
.
image
.
channels
=
input
->
dims
()[
1
];
convArgs
.
image
.
height
=
input
->
dims
()[
2
];
convArgs
.
image
.
width
=
input
->
dims
()[
3
];
convArgs
.
image
.
pad_height
=
param
->
Paddings
()[
0
];
convArgs
.
image
.
pad_width
=
param
->
Paddings
()[
1
];
convArgs
.
image
.
scale_address
=
input
->
scale
;
convArgs
.
output
.
address
=
(
void
*
)
out_ptr
;
fpga
::
WrapperConvArgs
convArgs
;
convArgs
.
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
split_num
=
(
uint32_t
)
fpga
::
get_plit_num
(
filter
);
convArgs
.
filter_num
=
(
uint32_t
)
filter
->
dims
()[
0
];
convArgs
.
output
.
address
=
out_ptr
;
convArgs
.
output
.
scale_address
=
out
->
scale
;
convArgs
.
conv_args
=
(
fpga
::
ConvArgs
*
)
fpga
::
fpga_malloc
(
convArgs
.
split_num
*
sizeof
(
fpga
::
ConvArgs
));
param
->
SetFpgaArgs
(
convArgs
);
int
element_num
=
fpga
::
get_aligned_filter_element_num
(
filter
->
dims
()[
1
]
*
filter
->
dims
()[
2
]
*
filter
->
dims
()[
3
]);
int
n
=
convArgs
.
split_num
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
convArgs
.
conv_args
[
i
].
relu_enabled
=
relu_enabled
;
convArgs
.
conv_args
[
i
].
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
conv_args
[
i
].
kernel
.
stride_h
=
(
uint32_t
)
param
->
Strides
()[
0
];
convArgs
.
conv_args
[
i
].
kernel
.
stride_w
=
(
uint32_t
)
param
->
Strides
()[
1
];
convArgs
.
conv_args
[
i
].
kernel
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
convArgs
.
conv_args
[
i
].
kernel
.
width
=
(
uint32_t
)
filter
->
dims
()[
3
];
convArgs
.
conv_args
[
i
].
image
.
address
=
input_ptr
;
convArgs
.
conv_args
[
i
].
image
.
channels
=
(
uint32_t
)
input
->
dims
()[
1
];
convArgs
.
conv_args
[
i
].
image
.
height
=
(
uint32_t
)
input
->
dims
()[
2
];
convArgs
.
conv_args
[
i
].
image
.
width
=
(
uint32_t
)
input
->
dims
()[
3
];
convArgs
.
conv_args
[
i
].
image
.
pad_height
=
(
uint32_t
)
param
->
Paddings
()[
0
];
convArgs
.
conv_args
[
i
].
image
.
pad_width
=
(
uint32_t
)
param
->
Paddings
()[
1
];
convArgs
.
conv_args
[
i
].
filter_address
=
&
((
int8_t
*
)
filter_ptr
)[
i
*
element_num
];
convArgs
.
conv_args
[
i
].
sb_address
=
&
((
int8_t
*
)
bs_ptr
)[
i
*
element_num
];
convArgs
.
conv_args
[
i
].
filter_num
=
(
uint32_t
)(
i
==
n
-
1
?
fpga
::
get_aligned_filter_num
(
channel
-
(
n
-
1
)
*
element_num_per_div
)
:
element_num_per_div
);
convArgs
.
conv_args
[
i
].
output
.
scale_address
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
sizeof
(
float
));
convArgs
.
conv_args
[
i
].
image
.
scale_address
=
input
->
scale
;
}
return
true
;
}
...
...
src/operators/kernel/fpga/conv_bn_kernel.cpp
浏览文件 @
71f88533
...
...
@@ -23,11 +23,10 @@ namespace operators {
template
<
>
bool
ConvBNKernel
<
FPGA
,
float
>::
Init
(
FusionConvBNParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
false
;
Tensor
*
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input_ptr
=
input
->
data
<
float
>
();
Tensor
*
filter
=
param
->
Filter
();
Tensor
*
out
=
param
->
Output
();
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
());
auto
out
=
param
->
Output
();
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
auto
bn_scale_ptr
=
param
->
InputScale
()
->
data
<
float
>
();
...
...
@@ -37,10 +36,10 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
"Output channel should be equal to bias number"
);
const
int
channel
=
out
->
dims
()[
1
];
float
*
bs_ptr
=
auto
bs_ptr
=
reinterpret_cast
<
float
*>
(
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
)));
Tensor
*
new_scale
=
new
Tensor
();
Tensor
*
new_bias
=
new
Tensor
();
auto
new_scale
=
new
Tensor
();
auto
new_bias
=
new
Tensor
();
auto
new_scale_ptr
=
new_scale
->
mutable_data
<
float
>
({
channel
});
auto
new_bias_ptr
=
new_bias
->
mutable_data
<
float
>
({
channel
});
...
...
@@ -65,27 +64,44 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
fpga
::
format_ofm
(
out
);
auto
out_ptr
=
out
->
mutable_data
<
float
>
();
fpga
::
ConvArgs
convArgs
;
convArgs
.
relu_enabled
=
relu_enabled
;
convArgs
.
filter_address
=
(
void
*
)
filter_ptr
;
convArgs
.
filter_num
=
filter
->
dims
()[
0
];
convArgs
.
group_num
=
param
->
Groups
();
convArgs
.
sb_address
=
(
void
*
)
bs_ptr
;
convArgs
.
kernel
.
stride_h
=
param
->
Strides
()[
0
];
convArgs
.
kernel
.
stride_w
=
param
->
Strides
()[
1
];
convArgs
.
kernel
.
height
=
filter
->
dims
()[
2
];
convArgs
.
kernel
.
width
=
filter
->
dims
()[
3
];
convArgs
.
image
.
address
=
(
void
*
)
input_ptr
;
convArgs
.
image
.
channels
=
input
->
dims
()[
1
];
convArgs
.
image
.
height
=
input
->
dims
()[
2
];
convArgs
.
image
.
width
=
input
->
dims
()[
3
];
convArgs
.
image
.
pad_height
=
param
->
Paddings
()[
0
];
convArgs
.
image
.
pad_width
=
param
->
Paddings
()[
1
];
convArgs
.
image
.
scale_address
=
input
->
scale
;
convArgs
.
output
.
address
=
(
void
*
)
out_ptr
;
fpga
::
WrapperConvArgs
convArgs
;
convArgs
.
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
split_num
=
(
uint32_t
)
fpga
::
get_plit_num
(
filter
);
convArgs
.
filter_num
=
(
uint32_t
)
filter
->
dims
()[
0
];
convArgs
.
output
.
address
=
out_ptr
;
convArgs
.
output
.
scale_address
=
out
->
scale
;
convArgs
.
conv_args
=
(
fpga
::
ConvArgs
*
)
fpga
::
fpga_malloc
(
convArgs
.
split_num
*
sizeof
(
fpga
::
ConvArgs
));
param
->
SetFpgaArgs
(
convArgs
);
int
element_num
=
fpga
::
get_aligned_filter_element_num
(
filter
->
dims
()[
1
]
*
filter
->
dims
()[
2
]
*
filter
->
dims
()[
3
]);
int
n
=
convArgs
.
split_num
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
convArgs
.
conv_args
[
i
].
relu_enabled
=
relu_enabled
;
convArgs
.
conv_args
[
i
].
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
conv_args
[
i
].
kernel
.
stride_h
=
(
uint32_t
)
param
->
Strides
()[
0
];
convArgs
.
conv_args
[
i
].
kernel
.
stride_w
=
(
uint32_t
)
param
->
Strides
()[
1
];
convArgs
.
conv_args
[
i
].
kernel
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
convArgs
.
conv_args
[
i
].
kernel
.
width
=
(
uint32_t
)
filter
->
dims
()[
3
];
convArgs
.
conv_args
[
i
].
image
.
address
=
input_ptr
;
convArgs
.
conv_args
[
i
].
image
.
channels
=
(
uint32_t
)
input
->
dims
()[
1
];
convArgs
.
conv_args
[
i
].
image
.
height
=
(
uint32_t
)
input
->
dims
()[
2
];
convArgs
.
conv_args
[
i
].
image
.
width
=
(
uint32_t
)
input
->
dims
()[
3
];
convArgs
.
conv_args
[
i
].
image
.
pad_height
=
(
uint32_t
)
param
->
Paddings
()[
0
];
convArgs
.
conv_args
[
i
].
image
.
pad_width
=
(
uint32_t
)
param
->
Paddings
()[
1
];
convArgs
.
conv_args
[
i
].
filter_address
=
&
((
int8_t
*
)
filter_ptr
)[
i
*
element_num
];
convArgs
.
conv_args
[
i
].
sb_address
=
&
((
int8_t
*
)
bs_ptr
)[
i
*
element_num
];
convArgs
.
conv_args
[
i
].
filter_num
=
(
uint32_t
)(
i
==
n
-
1
?
fpga
::
get_aligned_filter_num
(
channel
-
(
n
-
1
)
*
element_num_per_div
)
:
element_num_per_div
);
convArgs
.
conv_args
[
i
].
output
.
scale_address
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
sizeof
(
float
));
convArgs
.
conv_args
[
i
].
image
.
scale_address
=
input
->
scale
;
}
return
true
;
}
...
...
src/operators/kernel/fpga/conv_bn_relu_kernel.cpp
浏览文件 @
71f88533
...
...
@@ -22,10 +22,10 @@ namespace operators {
template
<
>
bool
ConvBNReluKernel
<
FPGA
,
float
>::
Init
(
FusionConvBNReluParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
true
;
Tensor
*
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input_ptr
=
input
->
data
<
float
>
();
Tensor
*
filter
=
param
->
Filter
(
);
Tensor
*
out
=
param
->
Output
();
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
()
);
auto
out
=
param
->
Output
();
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
auto
bn_scale_ptr
=
param
->
InputScale
()
->
data
<
float
>
();
...
...
@@ -34,9 +34,9 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
param
->
InputBias
()
->
dims
()[
0
],
"Output channel should be equal to bias number"
);
const
int
channel
=
out
->
dims
()[
1
];
float
*
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
Tensor
*
new_scale
=
new
Tensor
();
Tensor
*
new_bias
=
new
Tensor
();
auto
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
auto
new_scale
=
new
Tensor
();
auto
new_bias
=
new
Tensor
();
auto
new_scale_ptr
=
new_scale
->
mutable_data
<
float
>
({
channel
});
auto
new_bias_ptr
=
new_bias
->
mutable_data
<
float
>
({
channel
});
...
...
@@ -61,26 +61,44 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
fpga
::
format_ofm
(
out
);
auto
out_ptr
=
out
->
mutable_data
<
float
>
();
fpga
::
ConvArgs
convArgs
;
convArgs
.
relu_enabled
=
relu_enabled
;
convArgs
.
filter_address
=
(
void
*
)
filter_ptr
;
convArgs
.
filter_num
=
filter
->
dims
()[
0
];
convArgs
.
group_num
=
param
->
Groups
();
convArgs
.
sb_address
=
(
void
*
)
bs_ptr
;
convArgs
.
kernel
.
stride_h
=
param
->
Strides
()[
0
];
convArgs
.
kernel
.
stride_w
=
param
->
Strides
()[
1
];
convArgs
.
kernel
.
height
=
filter
->
dims
()[
2
];
convArgs
.
kernel
.
width
=
filter
->
dims
()[
3
];
convArgs
.
image
.
address
=
(
void
*
)
input_ptr
;
convArgs
.
image
.
channels
=
input
->
dims
()[
1
];
convArgs
.
image
.
height
=
input
->
dims
()[
2
];
convArgs
.
image
.
width
=
input
->
dims
()[
3
];
convArgs
.
image
.
pad_height
=
param
->
Paddings
()[
0
];
convArgs
.
image
.
pad_width
=
param
->
Paddings
()[
1
];
convArgs
.
image
.
scale_address
=
input
->
scale
;
convArgs
.
output
.
address
=
(
void
*
)
out_ptr
;
fpga
::
WrapperConvArgs
convArgs
;
convArgs
.
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
split_num
=
(
uint32_t
)
fpga
::
get_plit_num
(
filter
);
convArgs
.
filter_num
=
(
uint32_t
)
filter
->
dims
()[
0
];
convArgs
.
output
.
address
=
out_ptr
;
convArgs
.
output
.
scale_address
=
out
->
scale
;
convArgs
.
conv_args
=
(
fpga
::
ConvArgs
*
)
fpga
::
fpga_malloc
(
convArgs
.
split_num
*
sizeof
(
fpga
::
ConvArgs
));
param
->
SetFpgaArgs
(
convArgs
);
int
element_num
=
fpga
::
get_aligned_filter_element_num
(
filter
->
dims
()[
1
]
*
filter
->
dims
()[
2
]
*
filter
->
dims
()[
3
]);
int
n
=
convArgs
.
split_num
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
convArgs
.
conv_args
[
i
].
relu_enabled
=
relu_enabled
;
convArgs
.
conv_args
[
i
].
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
conv_args
[
i
].
kernel
.
stride_h
=
(
uint32_t
)
param
->
Strides
()[
0
];
convArgs
.
conv_args
[
i
].
kernel
.
stride_w
=
(
uint32_t
)
param
->
Strides
()[
1
];
convArgs
.
conv_args
[
i
].
kernel
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
convArgs
.
conv_args
[
i
].
kernel
.
width
=
(
uint32_t
)
filter
->
dims
()[
3
];
convArgs
.
conv_args
[
i
].
image
.
address
=
input_ptr
;
convArgs
.
conv_args
[
i
].
image
.
channels
=
(
uint32_t
)
input
->
dims
()[
1
];
convArgs
.
conv_args
[
i
].
image
.
height
=
(
uint32_t
)
input
->
dims
()[
2
];
convArgs
.
conv_args
[
i
].
image
.
width
=
(
uint32_t
)
input
->
dims
()[
3
];
convArgs
.
conv_args
[
i
].
image
.
pad_height
=
(
uint32_t
)
param
->
Paddings
()[
0
];
convArgs
.
conv_args
[
i
].
image
.
pad_width
=
(
uint32_t
)
param
->
Paddings
()[
1
];
convArgs
.
conv_args
[
i
].
filter_address
=
&
((
int8_t
*
)
filter_ptr
)[
i
*
element_num
];
convArgs
.
conv_args
[
i
].
sb_address
=
&
((
int8_t
*
)
bs_ptr
)[
i
*
element_num
];
convArgs
.
conv_args
[
i
].
filter_num
=
(
uint32_t
)(
i
==
n
-
1
?
fpga
::
get_aligned_filter_num
(
channel
-
(
n
-
1
)
*
element_num_per_div
)
:
element_num_per_div
);
convArgs
.
conv_args
[
i
].
output
.
scale_address
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
sizeof
(
float
));
convArgs
.
conv_args
[
i
].
image
.
scale_address
=
input
->
scale
;
}
return
true
;
}
...
...
src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp
浏览文件 @
71f88533
...
...
@@ -22,9 +22,9 @@ template <>
bool
ElementwiseAddReluKernel
<
FPGA
,
float
>::
Init
(
ElementwiseAddReluParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
true
;
Tensor
*
input_x
=
const_cast
<
Tensor
*>
(
param
->
InputX
());
Tensor
*
input_y
=
const_cast
<
Tensor
*>
(
param
->
InputY
());
Tensor
*
out
=
param
->
Out
();
auto
*
input_x
=
const_cast
<
LoD
Tensor
*>
(
param
->
InputX
());
auto
*
input_y
=
const_cast
<
LoD
Tensor
*>
(
param
->
InputY
());
auto
*
out
=
param
->
Out
();
auto
input_x_ptr
=
input_x
->
data
<
float
>
();
auto
input_y_ptr
=
input_y
->
data
<
float
>
();
fpga
::
format_ofm
(
out
);
...
...
@@ -34,22 +34,22 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
ewaddArgs
.
relu_enabled
=
relu_enabled
;
ewaddArgs
.
const0
=
1
;
ewaddArgs
.
const1
=
1
;
ewaddArgs
.
image0
.
address
=
(
void
*
)
input_x_ptr
;
ewaddArgs
.
image0
.
channels
=
input_x
->
dims
()[
1
];
ewaddArgs
.
image0
.
address
=
input_x_ptr
;
ewaddArgs
.
image0
.
channels
=
(
uint32_t
)
input_x
->
dims
()[
1
];
ewaddArgs
.
image0
.
scale_address
=
input_x
->
scale
;
ewaddArgs
.
image0
.
height
=
input_x
->
dims
()[
2
];
ewaddArgs
.
image0
.
width
=
input_x
->
dims
()[
3
];
ewaddArgs
.
image0
.
height
=
(
uint32_t
)
input_x
->
dims
()[
2
];
ewaddArgs
.
image0
.
width
=
(
uint32_t
)
input_x
->
dims
()[
3
];
ewaddArgs
.
image0
.
pad_height
=
0
;
ewaddArgs
.
image0
.
pad_width
=
0
;
ewaddArgs
.
image1
.
address
=
(
void
*
)
input_y_ptr
;
ewaddArgs
.
image1
.
channels
=
input_y
->
dims
()[
1
];
ewaddArgs
.
image1
.
address
=
input_y_ptr
;
ewaddArgs
.
image1
.
channels
=
(
uint32_t
)
input_y
->
dims
()[
1
];
ewaddArgs
.
image1
.
scale_address
=
input_y
->
scale
;
ewaddArgs
.
image1
.
height
=
input_y
->
dims
()[
2
];
ewaddArgs
.
image1
.
width
=
input_y
->
dims
()[
3
];
ewaddArgs
.
image1
.
height
=
(
uint32_t
)
input_y
->
dims
()[
2
];
ewaddArgs
.
image1
.
width
=
(
uint32_t
)
input_y
->
dims
()[
3
];
ewaddArgs
.
image1
.
pad_height
=
0
;
ewaddArgs
.
image1
.
pad_width
=
0
;
ewaddArgs
.
output
.
scale_address
=
out
->
scale
;
ewaddArgs
.
output
.
address
=
(
void
*
)
out_ptr
;
ewaddArgs
.
output
.
address
=
out_ptr
;
param
->
SetFpgaArgs
(
ewaddArgs
);
return
true
;
}
...
...
src/operators/kernel/fpga/fc_relu_kernel.cpp
浏览文件 @
71f88533
...
...
@@ -14,71 +14,84 @@ limitations under the License. */
#ifdef FUSION_FCRELU_OP
#include "operators/kernel/fc_relu_kernel.h"
#include "fpga/api.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
FusionFcReluKernel
<
FPGA
,
float
>::
Init
(
FusionFcReluParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
true
;
Tensor
*
input_x
=
const_cast
<
Tensor
*>
(
param
->
InputX
());
auto
input_x
=
const_cast
<
LoD
Tensor
*>
(
param
->
InputX
());
auto
input_x_ptr
=
input_x
->
data
<
float
>
();
Tensor
*
input_y
=
param
->
InputY
(
);
const
Tensor
*
input_z
=
param
->
InputZ
();
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
InputY
()
);
auto
input_z
=
param
->
InputZ
();
auto
input_z_ptr
=
input_z
->
data
<
float
>
();
Tensor
*
out
=
param
->
Out
();
PADDLE_MOBILE_ENFORCE
(
input_x
->
dims
()[
1
]
==
input_y
->
dims
()[
0
],
auto
out
=
param
->
Out
();
PADDLE_MOBILE_ENFORCE
(
input_x
->
dims
()[
1
]
==
filter
->
dims
()[
0
],
"Image channel should be equal to weight number"
);
int
channel
=
out
->
dims
()[
1
];
float
*
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
int
channel
=
(
uint32_t
)
out
->
dims
()[
1
];
auto
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
for
(
int
i
=
0
;
i
<
channel
;
i
++
)
{
bs_ptr
[
i
+
channel
]
=
1
;
bs_ptr
[
i
]
=
input_z_ptr
[
i
];
}
int
num
=
input_y
->
dims
()[
1
];
int
chw
=
input_y
->
dims
()[
0
];
int
num
=
(
uint32_t
)
filter
->
dims
()[
1
];
int
chw
=
(
uint32_t
)
filter
->
dims
()[
0
];
PADDLE_MOBILE_ENFORCE
(
chw
==
input_x
->
numel
(),
"Filter element num should be equal to IFM element num"
);
int
height
=
input_x
->
dims
()[
2
];
int
width
=
input_x
->
dims
()[
3
];
int
height
=
(
uint32_t
)
input_x
->
dims
()[
2
];
int
width
=
(
uint32_t
)
input_x
->
dims
()[
3
];
int
filter_channel
=
chw
/
height
/
width
;
input_y
->
Resize
(
framework
::
make_ddim
({
num
,
filter_channel
,
height
,
width
}));
float
max_value
=
fpga
::
filter_find_max
(
input_y
);
fpga
::
format_filter
(
input_y
,
max_value
,
1
);
auto
input_y_ptr
=
input_y
->
data
<
float
>
();
filter
->
Resize
(
framework
::
make_ddim
({
num
,
filter_channel
,
height
,
width
}));
float
max_value
=
fpga
::
filter_find_max
(
filter
);
fpga
::
format_filter
(
filter
,
max_value
,
1
);
auto
filter_ptr
=
filter
->
data
<
float
>
();
int
element_num_per_div
=
fpga
::
get_element_num_per_div
(
input_y
,
1
);
int
element_num_per_div
=
fpga
::
get_element_num_per_div
(
filter
,
1
);
fpga
::
format_bias_scale_array
(
&
bs_ptr
,
element_num_per_div
,
channel
);
fpga
::
format_ofm
(
out
);
auto
out_ptr
=
out
->
mutable_data
<
float
>
();
fpga
::
ConvArgs
convArgs
;
convArgs
.
relu_enabled
=
relu_enabled
;
convArgs
.
filter_address
=
(
void
*
)
input_y_ptr
;
convArgs
.
filter_num
=
out
->
dims
()[
1
];
fpga
::
WrapperConvArgs
convArgs
;
convArgs
.
group_num
=
1
;
convArgs
.
sb_address
=
(
void
*
)
bs_ptr
;
convArgs
.
kernel
.
stride_w
=
1
;
convArgs
.
kernel
.
stride_h
=
1
;
convArgs
.
kernel
.
height
=
input_x
->
dims
()[
2
];
convArgs
.
kernel
.
width
=
input_x
->
dims
()[
3
];
convArgs
.
image
.
address
=
(
void
*
)
input_x_ptr
;
convArgs
.
image
.
channels
=
input_x
->
dims
()[
1
];
convArgs
.
image
.
height
=
input_x
->
dims
()[
2
];
convArgs
.
image
.
width
=
input_x
->
dims
()[
3
];
convArgs
.
image
.
pad_height
=
0
;
convArgs
.
image
.
pad_width
=
0
;
convArgs
.
image
.
scale_address
=
input_x
->
scale
;
convArgs
.
output
.
address
=
(
void
*
)
out_ptr
;
convArgs
.
split_num
=
(
uint32_t
)
fpga
::
get_plit_num
(
filter
);
convArgs
.
filter_num
=
(
uint32_t
)
filter
->
dims
()[
0
];
convArgs
.
output
.
address
=
out_ptr
;
convArgs
.
output
.
scale_address
=
out
->
scale
;
convArgs
.
conv_args
=
(
fpga
::
ConvArgs
*
)
fpga
::
fpga_malloc
(
convArgs
.
split_num
*
sizeof
(
fpga
::
ConvArgs
));
param
->
SetFpgaArgs
(
convArgs
);
int
element_num
=
fpga
::
get_aligned_filter_element_num
(
filter
->
dims
()[
1
]
*
filter
->
dims
()[
2
]
*
filter
->
dims
()[
3
]);
int
n
=
convArgs
.
split_num
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
convArgs
.
conv_args
[
i
].
relu_enabled
=
relu_enabled
;
convArgs
.
conv_args
[
i
].
group_num
=
1
;
convArgs
.
conv_args
[
i
].
kernel
.
stride_h
=
1
;
convArgs
.
conv_args
[
i
].
kernel
.
stride_w
=
1
;
convArgs
.
conv_args
[
i
].
kernel
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
convArgs
.
conv_args
[
i
].
kernel
.
width
=
(
uint32_t
)
filter
->
dims
()[
3
];
convArgs
.
conv_args
[
i
].
image
.
address
=
input_x_ptr
;
convArgs
.
conv_args
[
i
].
image
.
channels
=
(
uint32_t
)
input_x
->
dims
()[
1
];
convArgs
.
conv_args
[
i
].
image
.
height
=
(
uint32_t
)
input_x
->
dims
()[
2
];
convArgs
.
conv_args
[
i
].
image
.
width
=
(
uint32_t
)
input_x
->
dims
()[
3
];
convArgs
.
conv_args
[
i
].
image
.
pad_height
=
0
;
convArgs
.
conv_args
[
i
].
image
.
pad_width
=
0
;
convArgs
.
conv_args
[
i
].
filter_address
=
&
((
int8_t
*
)
filter_ptr
)[
i
*
element_num
];
convArgs
.
conv_args
[
i
].
sb_address
=
&
((
int8_t
*
)
bs_ptr
)[
i
*
element_num
];
convArgs
.
conv_args
[
i
].
filter_num
=
(
uint32_t
)(
i
==
n
-
1
?
fpga
::
get_aligned_filter_num
(
channel
-
(
n
-
1
)
*
element_num_per_div
)
:
element_num_per_div
);
convArgs
.
conv_args
[
i
].
output
.
scale_address
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
sizeof
(
float
));
convArgs
.
conv_args
[
i
].
image
.
scale_address
=
input_x
->
scale
;
}
return
true
;
}
template
<
>
...
...
src/operators/kernel/fpga/fusion_fc_kernel.cpp
浏览文件 @
71f88533
...
...
@@ -21,58 +21,78 @@ namespace operators {
template
<
>
bool
FusionFcKernel
<
FPGA
,
float
>::
Init
(
FusionFcParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
false
;
Tensor
*
input_x
=
const_cast
<
Tensor
*>
(
param
->
InputX
());
auto
input_x
=
const_cast
<
LoD
Tensor
*>
(
param
->
InputX
());
auto
input_x_ptr
=
input_x
->
data
<
float
>
();
Tensor
*
input_y
=
param
->
InputY
(
);
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
InputY
()
);
const
Tensor
*
input_z
=
param
->
InputZ
();
auto
input_z_ptr
=
input_z
->
data
<
float
>
();
Tensor
*
out
=
param
->
Out
();
auto
out
=
param
->
Out
();
PADDLE_MOBILE_ENFORCE
(
input_x
->
dims
()[
1
]
==
input_y
->
dims
()[
0
],
PADDLE_MOBILE_ENFORCE
(
input_x
->
dims
()[
1
]
==
filter
->
dims
()[
0
],
"Image channel should be equal to weight number"
);
int
channel
=
out
->
dims
()[
1
];
float
*
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
int
channel
=
(
uint32_t
)
out
->
dims
()[
1
];
auto
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
for
(
int
i
=
0
;
i
<
channel
;
i
++
)
{
bs_ptr
[
i
+
channel
]
=
1
;
bs_ptr
[
i
]
=
input_z_ptr
[
i
];
}
int
num
=
input_y
->
dims
()[
1
];
int
chw
=
input_y
->
dims
()[
0
];
int
num
=
(
uint32_t
)
filter
->
dims
()[
1
];
int
chw
=
(
uint32_t
)
filter
->
dims
()[
0
];
PADDLE_MOBILE_ENFORCE
(
chw
==
input_x
->
numel
(),
"Filter element num should be equal to IFM element num"
);
int
height
=
input_x
->
dims
()[
2
];
int
width
=
input_x
->
dims
()[
3
];
int
height
=
(
uint32_t
)
input_x
->
dims
()[
2
];
int
width
=
(
uint32_t
)
input_x
->
dims
()[
3
];
int
filter_channel
=
chw
/
height
/
width
;
input_y
->
Resize
(
framework
::
make_ddim
({
num
,
filter_channel
,
height
,
width
}));
float
max_value
=
fpga
::
filter_find_max
(
input_y
);
fpga
::
format_filter
(
input_y
,
max_value
,
1
);
auto
input_y_ptr
=
input_y
->
data
<
float
>
();
int
element_num_per_div
=
fpga
::
get_element_num_per_div
(
input_y
,
1
);
filter
->
Resize
(
framework
::
make_ddim
({
num
,
filter_channel
,
height
,
width
}));
float
max_value
=
fpga
::
filter_find_max
(
filter
);
fpga
::
format_filter
(
filter
,
max_value
,
1
);
auto
filter_ptr
=
filter
->
data
<
float
>
();
int
element_num_per_div
=
fpga
::
get_element_num_per_div
(
filter
,
1
);
fpga
::
format_bias_scale_array
(
&
bs_ptr
,
element_num_per_div
,
channel
);
auto
out_ptr
=
out
->
mutable_data
<
float
>
();
fpga
::
ConvArgs
convArgs
;
convArgs
.
relu_enabled
=
relu_enabled
;
convArgs
.
filter_address
=
(
void
*
)
input_y_ptr
;
convArgs
.
filter_num
=
out
->
dims
()[
1
];
fpga
::
WrapperConvArgs
convArgs
;
convArgs
.
group_num
=
1
;
convArgs
.
sb_address
=
(
void
*
)
bs_ptr
;
convArgs
.
kernel
.
stride_w
=
1
;
convArgs
.
kernel
.
stride_h
=
1
;
convArgs
.
kernel
.
height
=
input_x
->
dims
()[
2
];
convArgs
.
kernel
.
width
=
input_x
->
dims
()[
3
];
convArgs
.
image
.
address
=
(
void
*
)
input_x_ptr
;
convArgs
.
image
.
channels
=
input_x
->
dims
()[
1
];
convArgs
.
image
.
height
=
input_x
->
dims
()[
2
];
convArgs
.
image
.
width
=
input_x
->
dims
()[
3
];
convArgs
.
image
.
pad_height
=
0
;
convArgs
.
image
.
pad_width
=
0
;
convArgs
.
image
.
scale_address
=
input_x
->
scale
;
convArgs
.
output
.
address
=
(
void
*
)
out_ptr
;
convArgs
.
split_num
=
(
uint32_t
)
fpga
::
get_plit_num
(
filter
);
convArgs
.
filter_num
=
(
uint32_t
)
filter
->
dims
()[
0
];
convArgs
.
output
.
address
=
out_ptr
;
convArgs
.
output
.
scale_address
=
out
->
scale
;
convArgs
.
conv_args
=
(
fpga
::
ConvArgs
*
)
fpga
::
fpga_malloc
(
convArgs
.
split_num
*
sizeof
(
fpga
::
ConvArgs
));
param
->
SetFpgaArgs
(
convArgs
);
int
element_num
=
fpga
::
get_aligned_filter_element_num
(
filter
->
dims
()[
1
]
*
filter
->
dims
()[
2
]
*
filter
->
dims
()[
3
]);
int
n
=
convArgs
.
split_num
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
convArgs
.
conv_args
[
i
].
relu_enabled
=
relu_enabled
;
convArgs
.
conv_args
[
i
].
group_num
=
1
;
convArgs
.
conv_args
[
i
].
kernel
.
stride_h
=
1
;
convArgs
.
conv_args
[
i
].
kernel
.
stride_w
=
1
;
convArgs
.
conv_args
[
i
].
kernel
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
convArgs
.
conv_args
[
i
].
kernel
.
width
=
(
uint32_t
)
filter
->
dims
()[
3
];
convArgs
.
conv_args
[
i
].
image
.
address
=
input_x_ptr
;
convArgs
.
conv_args
[
i
].
image
.
channels
=
(
uint32_t
)
input_x
->
dims
()[
1
];
convArgs
.
conv_args
[
i
].
image
.
height
=
(
uint32_t
)
input_x
->
dims
()[
2
];
convArgs
.
conv_args
[
i
].
image
.
width
=
(
uint32_t
)
input_x
->
dims
()[
3
];
convArgs
.
conv_args
[
i
].
image
.
pad_height
=
0
;
convArgs
.
conv_args
[
i
].
image
.
pad_width
=
0
;
convArgs
.
conv_args
[
i
].
filter_address
=
&
((
int8_t
*
)
filter_ptr
)[
i
*
element_num
];
convArgs
.
conv_args
[
i
].
sb_address
=
&
((
int8_t
*
)
bs_ptr
)[
i
*
element_num
];
convArgs
.
conv_args
[
i
].
filter_num
=
(
uint32_t
)(
i
==
n
-
1
?
fpga
::
get_aligned_filter_num
(
channel
-
(
n
-
1
)
*
element_num_per_div
)
:
element_num_per_div
);
convArgs
.
conv_args
[
i
].
output
.
scale_address
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
sizeof
(
float
));
convArgs
.
conv_args
[
i
].
image
.
scale_address
=
input_x
->
scale
;
}
return
true
;
}
...
...
src/operators/kernel/fpga/pool_kernel.cpp
浏览文件 @
71f88533
...
...
@@ -21,7 +21,7 @@ namespace operators {
template
<
>
bool
PoolKernel
<
FPGA
,
float
>::
Init
(
PoolParam
<
FPGA
>
*
param
)
{
Tensor
*
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
*
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input_ptr
=
input
->
data
<
float
>
();
Tensor
*
output
=
param
->
Output
();
fpga
::
format_ofm
(
output
);
...
...
@@ -31,19 +31,19 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
vector
<
int
>
paddings
=
param
->
Paddings
();
fpga
::
PoolingArgs
poolArgs
;
poolArgs
.
image
.
address
=
(
void
*
)
input_ptr
;
poolArgs
.
image
.
channels
=
input
->
dims
()[
1
];
poolArgs
.
image
.
height
=
input
->
dims
()[
2
];
poolArgs
.
image
.
width
=
input
->
dims
()[
3
];
poolArgs
.
image
.
pad_height
=
paddings
[
0
];
poolArgs
.
image
.
pad_width
=
paddings
[
1
];
poolArgs
.
image
.
address
=
input_ptr
;
poolArgs
.
image
.
channels
=
(
uint32_t
)
input
->
dims
()[
1
];
poolArgs
.
image
.
height
=
(
uint32_t
)
input
->
dims
()[
2
];
poolArgs
.
image
.
width
=
(
uint32_t
)
input
->
dims
()[
3
];
poolArgs
.
image
.
pad_height
=
(
uint32_t
)
paddings
[
0
];
poolArgs
.
image
.
pad_width
=
(
uint32_t
)
paddings
[
1
];
poolArgs
.
image
.
scale_address
=
input
->
scale
;
poolArgs
.
output
.
address
=
output_ptr
;
poolArgs
.
output
.
scale_address
=
input
->
scale
;
poolArgs
.
kernel
.
height
=
ksize
[
0
];
poolArgs
.
kernel
.
width
=
ksize
[
1
];
poolArgs
.
kernel
.
stride_h
=
strides
[
0
];
poolArgs
.
kernel
.
stride_w
=
strides
[
1
];
poolArgs
.
kernel
.
height
=
(
uint32_t
)
ksize
[
0
];
poolArgs
.
kernel
.
width
=
(
uint32_t
)
ksize
[
1
];
poolArgs
.
kernel
.
stride_h
=
(
uint32_t
)
strides
[
0
];
poolArgs
.
kernel
.
stride_w
=
(
uint32_t
)
strides
[
1
];
param
->
SetFpgaArgs
(
poolArgs
);
return
true
;
}
...
...
src/operators/kernel/fpga/softmax_kernel.cpp
浏览文件 @
71f88533
...
...
@@ -33,8 +33,8 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
args
.
convert_type
=
fpga
::
DATA_FP16_TO_FP32
;
args
.
layout_type
=
fpga
::
LAYOUT_NO_CONVERT
;
args
.
image
.
address
=
(
void
*
)(
input_ptr
);
args
.
image
.
height
=
input
->
dims
()[
0
];
args
.
image
.
width
=
input
->
dims
()[
1
];
args
.
image
.
height
=
(
uint32_t
)
input
->
dims
()[
0
];
args
.
image
.
width
=
(
uint32_t
)
input
->
dims
()[
1
];
args
.
image
.
channels
=
1
;
args
.
output
.
address
=
output_ptr
;
param
->
SetFpgaArgs
(
args
);
...
...
src/operators/math/gemm.cpp
浏览文件 @
71f88533
...
...
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/math/gemm.h"
#include <string>
#include <string
.h
>
#include "common/log.h"
#include "memory/t_malloc.h"
#if __ARM_NEON
...
...
@@ -2985,6 +2985,8 @@ void WriteWithBn(int mc, int nc, float *c, float *C, int ldc, float *new_scale,
void
WriteWithBnRelu
(
int
mc
,
int
nc
,
float
*
c
,
float
*
C
,
int
ldc
,
float
*
new_scale
,
float
*
new_bias
)
{}
void
WriteWithBnAddRelu
(
int
mc
,
int
nc
,
float
*
c
,
float
*
C
,
int
ldc
,
float
*
new_scale
,
float
*
new_bias
,
float
*
bias1
)
{}
#endif // __ARM_NEON
...
...
src/operators/op_param.h
浏览文件 @
71f88533
...
...
@@ -489,6 +489,15 @@ class ConcatParam : public OpParam {
vector
<
GType
*>
inputs_
;
GType
*
out_
;
int
axis_
;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
ConcatArgs
fpga_concat_args
;
public:
const
fpga
::
ConcatArgs
&
FpgaArgs
()
const
{
return
fpga_concat_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConcatArgs
&
args
)
{
fpga_concat_args
=
args
;
}
#endif
};
#endif
...
...
@@ -1238,11 +1247,7 @@ class FusionFcParam : public OpParam {
}
const
GType
*
InputX
()
const
{
return
input_x_
;
}
#ifdef PADDLE_MOBILE_FPGA
RType
*
InputY
()
const
{
return
input_y_
;
}
#else
const
RType
*
InputY
()
const
{
return
input_y_
;
}
#endif
const
RType
*
InputZ
()
const
{
return
input_z_
;
}
...
...
@@ -1265,11 +1270,11 @@ class FusionFcParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
ConvArgs
fpga_conv_args
;
fpga
::
Wrapper
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
Wrapper
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Wrapper
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
...
...
@@ -1303,11 +1308,7 @@ class FusionConvAddParam : public OpParam {
const
RType
*
Input
()
const
{
return
input_
;
}
#ifdef PADDLE_MOBILE_FPGA
RType
*
Filter
()
const
{
return
filter_
;
}
#else
const
RType
*
Filter
()
const
{
return
filter_
;
}
#endif
RType
*
Output
()
const
{
return
output_
;
}
...
...
@@ -1332,11 +1333,11 @@ class FusionConvAddParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
ConvArgs
fpga_conv_args
;
fpga
::
Wrapper
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
Wrapper
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Wrapper
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
...
...
@@ -1385,11 +1386,7 @@ class FusionConvAddPReluParam : public OpParam {
const
RType
*
Input
()
const
{
return
input_
;
}
#ifdef PADDLE_MOBILE_FPGA
RType
*
Filter
()
const
{
return
filter_
;
}
#else
const
RType
*
Filter
()
const
{
return
filter_
;
}
#endif
RType
*
Output
()
const
{
return
output_
;
}
...
...
@@ -1416,11 +1413,11 @@ class FusionConvAddPReluParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
ConvArgs
fpga_conv_args
;
fpga
::
Wrapper
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
Wrapper
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Wrapper
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
@@ -1467,11 +1464,7 @@ class FusionConvAddAddPReluParam : public OpParam {
const
RType
*
Input
()
const
{
return
input_
;
}
#ifdef PADDLE_MOBILE_FPGA
RType
*
Filter
()
const
{
return
filter_
;
}
#else
const
RType
*
Filter
()
const
{
return
filter_
;
}
#endif
RType
*
Output
()
const
{
return
output_
;
}
...
...
@@ -1502,11 +1495,11 @@ class FusionConvAddAddPReluParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
ConvArgs
fpga_conv_args
;
fpga
::
Wrapper
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
Wrapper
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Wrapper
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
@@ -1544,11 +1537,7 @@ class FusionConvAddBNReluParam : public OpParam {
const
RType
*
Input
()
const
{
return
input_
;
}
#ifdef PADDLE_MOBILE_FPGA
RType
*
Filter
()
const
{
return
filter_
;
}
#else
const
RType
*
Filter
()
const
{
return
filter_
;
}
#endif
RType
*
Output
()
const
{
return
output_
;
}
...
...
@@ -1604,11 +1593,11 @@ class FusionConvAddBNReluParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
ConvArgs
fpga_conv_args
;
fpga
::
Wrapper
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
Wrapper
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Wrapper
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
@@ -1654,11 +1643,7 @@ class FusionConvBNAddReluParam : public OpParam {
const
RType
*
Input
()
const
{
return
input_
;
}
#ifdef PADDLE_MOBILE_FPGA
RType
*
Filter
()
const
{
return
filter_
;
}
#else
const
RType
*
Filter
()
const
{
return
filter_
;
}
#endif
RType
*
Output
()
const
{
return
output_
;
}
...
...
@@ -1717,11 +1702,11 @@ class FusionConvBNAddReluParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
ConvArgs
fpga_conv_args
;
fpga
::
Wrapper
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
Wrapper
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Wrapper
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
@@ -1754,11 +1739,8 @@ class FusionConvBNParam : public OpParam {
const
RType
*
Input
()
const
{
return
input_
;
}
#ifdef PADDLE_MOBILE_FPGA
RType
*
Filter
()
const
{
return
filter_
;
}
#else
const
RType
*
Filter
()
const
{
return
filter_
;
}
#endif
RType
*
Output
()
const
{
return
output_y_
;
}
const
vector
<
int
>
&
Strides
()
const
{
return
strides_
;
}
...
...
@@ -1811,11 +1793,11 @@ class FusionConvBNParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
ConvArgs
fpga_conv_args
;
fpga
::
Wrapper
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
Wrapper
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Wrapper
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
@@ -1853,11 +1835,8 @@ class FusionConvAddBNParam : public OpParam {
const
RType
*
Input
()
const
{
return
input_
;
}
#ifdef PADDLE_MOBILE_FPGA
RType
*
Filter
()
const
{
return
filter_
;
}
#else
const
RType
*
Filter
()
const
{
return
filter_
;
}
#endif
RType
*
Output
()
const
{
return
output_y_
;
}
const
vector
<
int
>
&
Strides
()
const
{
return
strides_
;
}
...
...
@@ -1912,11 +1891,11 @@ class FusionConvAddBNParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
ConvArgs
fpga_conv_args
;
fpga
::
Wrapper
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
Wrapper
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Wrapper
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
@@ -2033,11 +2012,7 @@ class FusionConvBNReluParam : public OpParam {
const
RType
*
Input
()
const
{
return
input_
;
}
#ifdef PADDLE_MOBILE_FPGA
RType
*
Filter
()
const
{
return
filter_
;
}
#else
const
RType
*
Filter
()
const
{
return
filter_
;
}
#endif
RType
*
Output
()
const
{
return
output_
;
}
...
...
@@ -2091,11 +2066,11 @@ class FusionConvBNReluParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
ConvArgs
fpga_conv_args
;
fpga
::
Wrapper
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
Wrapper
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Wrapper
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
@@ -2147,15 +2122,20 @@ class DropoutParam : public OpParam {
const
AttributeMap
&
attrs
,
const
Scope
&
scope
)
{
input_x_
=
InputXFrom
<
GType
>
(
inputs
,
scope
);
out_
=
OutFrom
<
GType
>
(
outputs
,
scope
);
dropout_prob_
=
GetAttr
<
float
>
(
"dropout_prob"
,
attrs
);
}
const
RType
*
InputX
()
const
{
return
input_x_
;
}
RType
*
Out
()
const
{
return
out_
;
}
float
DropoutProb
()
const
{
return
dropout_prob_
;
}
private:
RType
*
input_x_
;
RType
*
out_
;
float
dropout_prob_
;
};
#endif
...
...
test/CMakeLists.txt
浏览文件 @
71f88533
...
...
@@ -208,6 +208,14 @@ else ()
target_link_libraries
(
test-gru-op paddle-mobile
)
# gen test
ADD_EXECUTABLE
(
test-inceptionv4 net/test_inceptionv4.cpp test_helper.h test_include.h executor_for_test.h
)
target_link_libraries
(
test-inceptionv4 paddle-mobile
)
# gen test
ADD_EXECUTABLE
(
test-alexnet net/test_alexnet.cpp test_helper.h test_include.h executor_for_test.h
)
target_link_libraries
(
test-alexnet paddle-mobile
)
ADD_EXECUTABLE
(
test-googlenetv1 net/test_googlenetv1_combine.cpp test_helper.h test_include.h
)
target_link_libraries
(
test-googlenetv1 paddle-mobile
)
...
...
@@ -215,10 +223,13 @@ else ()
ADD_EXECUTABLE
(
test-fssd net/test_mobilenet_025_fssd.cpp test_helper.h test_include.h
)
target_link_libraries
(
test-fssd paddle-mobile
)
#add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)
endif
()
# if(FPGA)
...
...
test/net/test_alexnet.cpp
0 → 100644
浏览文件 @
71f88533
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
int
main
()
{
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
>
paddle_mobile
;
paddle_mobile
.
SetThreadNum
(
4
);
auto
time1
=
time
();
// auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
// std::string(g_mobilenet_detect) + "/params", true);
auto
isok
=
paddle_mobile
.
Load
(
g_alexnet
,
true
);
if
(
isok
)
{
auto
time2
=
time
();
std
::
cout
<<
"load cost :"
<<
time_diff
(
time1
,
time1
)
<<
"ms"
<<
std
::
endl
;
std
::
vector
<
float
>
input
;
std
::
vector
<
int64_t
>
dims
{
1
,
3
,
224
,
224
};
GetInput
<
float
>
(
g_test_image_1x3x224x224_banana
,
&
input
,
dims
);
auto
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
std
::
vector
<
float
>::
iterator
biggest
=
std
::
max_element
(
std
::
begin
(
vec_result
),
std
::
end
(
vec_result
));
std
::
cout
<<
" Max element is "
<<
*
biggest
<<
" at position "
<<
std
::
distance
(
std
::
begin
(
vec_result
),
biggest
)
<<
std
::
endl
;
// 预热十次
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
auto
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
}
auto
time3
=
time
();
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
auto
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
}
DLOG
<<
vec_result
;
auto
time4
=
time
();
std
::
cout
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
/
10
<<
"ms"
<<
std
::
endl
;
}
std
::
cout
<<
"如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
"是否存在?"
<<
std
::
endl
;
return
0
;
}
test/net/test_inceptionv4.cpp
0 → 100644
浏览文件 @
71f88533
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
int
main
()
{
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
>
paddle_mobile
;
paddle_mobile
.
SetThreadNum
(
4
);
auto
time1
=
time
();
// auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
// std::string(g_mobilenet_detect) + "/params", true);
auto
isok
=
paddle_mobile
.
Load
(
g_inceptionv4
,
true
);
if
(
isok
)
{
auto
time2
=
time
();
std
::
cout
<<
"load cost :"
<<
time_diff
(
time1
,
time1
)
<<
"ms"
<<
std
::
endl
;
std
::
vector
<
float
>
input
;
std
::
vector
<
int64_t
>
dims
{
1
,
3
,
224
,
224
};
GetInput
<
float
>
(
g_test_image_1x3x224x224_banana
,
&
input
,
dims
);
auto
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
std
::
vector
<
float
>::
iterator
biggest
=
std
::
max_element
(
std
::
begin
(
vec_result
),
std
::
end
(
vec_result
));
std
::
cout
<<
" Max element is "
<<
*
biggest
<<
" at position "
<<
std
::
distance
(
std
::
begin
(
vec_result
),
biggest
)
<<
std
::
endl
;
// 预热十次
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
auto
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
}
auto
time3
=
time
();
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
auto
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
}
// DLOG << vec_result;
auto
time4
=
time
();
std
::
cout
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
/
10
<<
"ms"
<<
std
::
endl
;
}
std
::
cout
<<
"如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
"是否存在?"
<<
std
::
endl
;
return
0
;
}
test/test_helper.h
浏览文件 @
71f88533
...
...
@@ -34,6 +34,8 @@ static const char *g_mobilenet_detect = "../models/mobilenet-detect";
static
const
char
*
g_squeezenet
=
"../models/squeezenet"
;
static
const
char
*
g_googlenet
=
"../models/googlenet"
;
static
const
char
*
g_mobilenet
=
"../models/mobilenet"
;
static
const
char
*
g_alexnet
=
"../models/alexnet"
;
static
const
char
*
g_inceptionv4
=
"../models/inceptionv4"
;
static
const
char
*
g_nlp
=
"../models/nlp"
;
static
const
char
*
g_resnet_50
=
"../models/resnet_50"
;
static
const
char
*
g_resnet
=
"../models/resnet"
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录