Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
4fb30240
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4fb30240
编写于
3月 13, 2019
作者:
H
hjchen2
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update
上级
2bbf3ec6
变更
25
隐藏空白更改
内联
并排
Showing
25 changed file
with
158 addition
and
185 deletion
+158
-185
src/common/log.h
src/common/log.h
+2
-1
src/framework/executor.cpp
src/framework/executor.cpp
+0
-14
src/framework/executor.h
src/framework/executor.h
+0
-1
src/io/api_paddle_mobile.cc
src/io/api_paddle_mobile.cc
+1
-1
src/io/jni/paddle_mobile_jni.cpp
src/io/jni/paddle_mobile_jni.cpp
+0
-2
src/io/paddle_mobile.h
src/io/paddle_mobile.h
+0
-1
src/operators/fusion_deconv_add_bn_op.h
src/operators/fusion_deconv_add_bn_op.h
+1
-1
src/operators/fusion_deconv_add_bn_relu_op.h
src/operators/fusion_deconv_add_bn_relu_op.h
+1
-1
src/operators/fusion_deconv_bn_relu_op.h
src/operators/fusion_deconv_bn_relu_op.h
+1
-1
src/operators/kernel/central-arm-func/conv_arm_func.cpp
src/operators/kernel/central-arm-func/conv_arm_func.cpp
+2
-0
src/operators/kernel/fpga/V1/conv_kernel.cpp
src/operators/kernel/fpga/V1/conv_kernel.cpp
+2
-2
src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp
src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp
+2
-2
src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp
src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp
+2
-2
src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp
src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp
+2
-2
src/operators/kernel/fpga/V1/deconv_bn_relu_kernel.cpp
src/operators/kernel/fpga/V1/deconv_bn_relu_kernel.cpp
+2
-2
src/operators/kernel/fpga/V1/fetch_kernel.cpp
src/operators/kernel/fpga/V1/fetch_kernel.cpp
+9
-13
src/operators/kernel/fpga/V1/fusion_fc_relu_kernel.cpp
src/operators/kernel/fpga/V1/fusion_fc_relu_kernel.cpp
+1
-1
src/operators/kernel/fpga/V1/pad2d_kernel.cpp
src/operators/kernel/fpga/V1/pad2d_kernel.cpp
+5
-5
src/operators/kernel/fpga/V1/pool_kernel.cpp
src/operators/kernel/fpga/V1/pool_kernel.cpp
+1
-1
src/operators/kernel/fpga/V1/sigmoid_kernel.cpp
src/operators/kernel/fpga/V1/sigmoid_kernel.cpp
+1
-1
src/operators/math/channel_wise.h
src/operators/math/channel_wise.h
+2
-2
src/operators/math/gemm/cblas.cc
src/operators/math/gemm/cblas.cc
+4
-0
src/operators/pad2d_op.cpp
src/operators/pad2d_op.cpp
+3
-0
test/fpga/test_rfcn_api.cpp
test/fpga/test_rfcn_api.cpp
+106
-125
test/net/test_benchmark.cpp
test/net/test_benchmark.cpp
+8
-4
未找到文件。
src/common/log.h
浏览文件 @
4fb30240
...
...
@@ -31,7 +31,8 @@ namespace paddle_mobile {
#ifdef ANDROID
extern
const
char
*
ANDROID_LOG_TAG
;
static
const
char
*
ANDROID_LOG_TAG
=
"paddle_mobile LOG built on "
__DATE__
" "
__TIME__
;
#define ANDROIDLOGI(...) \
__android_log_print(ANDROID_LOG_INFO, ANDROID_LOG_TAG, __VA_ARGS__); \
...
...
src/framework/executor.cpp
浏览文件 @
4fb30240
...
...
@@ -531,20 +531,6 @@ void Executor<Device, T>::FeedData(const std::vector<void *> &v) {
}
}
template
<
typename
Device
,
typename
T
>
void
Executor
<
Device
,
T
>::
FeedTensorData
(
const
vector
<
framework
::
Tensor
>
&
v
)
{
auto
input_size
=
v
.
size
();
int
index
=
0
;
auto
vars
=
program_
.
scope
->
VarContain
(
"feed"
,
&
index
);
PADDLE_MOBILE_ENFORCE
(
input_size
==
vars
.
size
(),
"input data number not correct"
);
for
(
int
i
=
0
;
i
<
input_size
;
i
++
)
{
auto
var
=
program_
.
scope
->
Var
(
"feed"
,
i
+
index
);
auto
feed_tensor
=
var
->
template
GetMutable
<
LoDTensor
>();
feed_tensor
->
ShareDataWith
(
v
[
i
]);
}
}
template
<
typename
Device
,
typename
T
>
void
Executor
<
Device
,
T
>::
GetResults
(
std
::
vector
<
void
*>
*
v
)
{
auto
output_size
=
v
->
size
();
...
...
src/framework/executor.h
浏览文件 @
4fb30240
...
...
@@ -53,7 +53,6 @@ class Executor {
void
InjectVariable
(
const
Tensor
&
t
,
std
::
string
var_name
);
void
FeedData
(
const
Tensor
&
t
);
void
FeedData
(
const
std
::
vector
<
void
*>
&
v
);
void
FeedTensorData
(
const
std
::
vector
<
framework
::
Tensor
>
&
v
);
void
GetResults
(
std
::
vector
<
void
*>
*
v
);
void
GetTensorResults
(
std
::
vector
<
framework
::
Tensor
*>
*
v
);
...
...
src/io/api_paddle_mobile.cc
浏览文件 @
4fb30240
...
...
@@ -146,7 +146,7 @@ void PaddleMobilePredictor<Device, T>::FeedPaddleTensors(
tensors
[
i
].
init
(
typeid
(
float
));
ConvertPaddleTensors
(
inputs
[
i
],
&
tensors
[
i
]);
}
paddle_mobile_
->
FeedTensorData
(
tensors
);
//
paddle_mobile_->FeedTensorData(tensors);
}
template
<
typename
Device
,
typename
T
>
...
...
src/io/jni/paddle_mobile_jni.cpp
浏览文件 @
4fb30240
...
...
@@ -39,8 +39,6 @@ using framework::Tensor;
using
paddle_mobile
::
CPU
;
using
std
::
string
;
const
char
*
ANDROID_LOG_TAG
=
"paddle_mobile LOG built on "
__DATE__
" "
__TIME__
;
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
>
paddle_mobile
;
static
std
::
mutex
shared_mutex
;
...
...
src/io/paddle_mobile.h
浏览文件 @
4fb30240
...
...
@@ -91,7 +91,6 @@ class PaddleMobile {
void
InjectVariable
(
const
framework
::
Tensor
&
t
,
std
::
string
var_name
);
void
FeedData
(
const
framework
::
Tensor
&
t
);
void
FeedData
(
const
std
::
vector
<
void
*>
&
v
);
void
FeedTensorData
(
const
std
::
vector
<
framework
::
Tensor
>
&
v
);
void
GetResults
(
std
::
vector
<
void
*>
*
v
);
void
GetTensorResults
(
std
::
vector
<
framework
::
Tensor
*>
*
v
);
...
...
src/operators/fusion_deconv_add_bn_op.h
浏览文件 @
4fb30240
...
...
@@ -57,7 +57,7 @@ class FusionDeconvAddBNOp : public framework::OperatorWithKernel<
FusionDeconvAddBNOp
(
const
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
,
std
::
shared_ptr
<
framework
::
Scope
>
scope
)
framework
::
Scope
*
scope
)
:
framework
::
OperatorWithKernel
<
DeviceType
,
FusionDeconvAddBNParam
<
DeviceType
>
,
operators
::
DeconvAddBNKernel
<
DeviceType
,
T
>>
(
type
,
inputs
,
outputs
,
...
...
src/operators/fusion_deconv_add_bn_relu_op.h
浏览文件 @
4fb30240
...
...
@@ -59,7 +59,7 @@ class FusionDeconvAddBNReluOp
FusionDeconvAddBNReluOp
(
const
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
,
std
::
shared_ptr
<
framework
::
Scope
>
scope
)
framework
::
Scope
*
scope
)
:
framework
::
OperatorWithKernel
<
DeviceType
,
FusionDeconvAddBNReluParam
<
DeviceType
>
,
operators
::
DeconvAddBNReluKernel
<
DeviceType
,
T
>>
(
...
...
src/operators/fusion_deconv_bn_relu_op.h
浏览文件 @
4fb30240
...
...
@@ -56,7 +56,7 @@ class FusionDeconvBNReluOp
FusionDeconvBNReluOp
(
const
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
,
std
::
shared_ptr
<
framework
::
Scope
>
scope
)
framework
::
Scope
*
scope
)
:
framework
::
OperatorWithKernel
<
DeviceType
,
FusionDeconvBNReluParam
<
DeviceType
>
,
operators
::
DeconvBNReluKernel
<
DeviceType
,
T
>>
(
type
,
inputs
,
outputs
,
...
...
src/operators/kernel/central-arm-func/conv_arm_func.cpp
浏览文件 @
4fb30240
...
...
@@ -47,6 +47,7 @@ bool IsExpand(const std::vector<int64_t> &filter_dim,
return
!
(
filter_1
&&
strides_1
&&
padding_0
&&
dilation_1
);
}
#ifdef PADDLE_MOBILE_CPU
template
<
typename
Itype
,
typename
Otype
>
void
GemmConv
(
const
ConvParam
<
CPU
>
&
param
)
{
const
Tensor
*
input
=
param
.
Input
();
...
...
@@ -241,6 +242,7 @@ template void GemmConv<int8_t, int32_t>(const ConvParam<CPU> ¶m);
template
void
DepthwiseConv3x3
<
int8_t
,
int32_t
>(
const
ConvParam
<
CPU
>
&
param
);
template
void
DepthwiseConv5x5
<
int8_t
,
int32_t
>(
const
ConvParam
<
CPU
>
&
param
);
#endif
#endif
}
// namespace operators
}
// namespace paddle_mobile
src/operators/kernel/fpga/V1/conv_kernel.cpp
浏览文件 @
4fb30240
...
...
@@ -24,8 +24,8 @@ bool ConvKernel<FPGA, float>::Init(ConvParam<FPGA> *param) {
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
NONE
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
());
auto
input
=
const_cast
<
LoD
Tensor
*>
(
param
->
Input
());
auto
filter
=
const_cast
<
LoD
Tensor
*>
(
param
->
Filter
());
auto
out
=
param
->
Output
();
int
channel
=
out
->
dims
()[
1
];
auto
bs_ptr
=
...
...
src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp
浏览文件 @
4fb30240
...
...
@@ -27,10 +27,10 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
NONE
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input
=
const_cast
<
LoD
Tensor
*>
(
param
->
Input
());
// const Tensor *bias = param->Bias();
// auto bias_ptr = bias->data<float>();
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
());
auto
filter
=
const_cast
<
LoD
Tensor
*>
(
param
->
Filter
());
auto
out
=
param
->
Output
();
// PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
...
...
src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp
浏览文件 @
4fb30240
...
...
@@ -27,10 +27,10 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
NONE
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input
=
const_cast
<
LoD
Tensor
*>
(
param
->
Input
());
const
Tensor
*
bias
=
param
->
InputBias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
());
auto
filter
=
const_cast
<
LoD
Tensor
*>
(
param
->
Filter
());
auto
out
=
param
->
Output
();
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
...
...
src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp
浏览文件 @
4fb30240
...
...
@@ -28,10 +28,10 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
LEAKYRELU
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input
=
const_cast
<
LoD
Tensor
*>
(
param
->
Input
());
const
Tensor
*
bias
=
param
->
InputBias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
());
auto
filter
=
const_cast
<
LoD
Tensor
*>
(
param
->
Filter
());
auto
out
=
param
->
Output
();
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
...
...
src/operators/kernel/fpga/V1/deconv_bn_relu_kernel.cpp
浏览文件 @
4fb30240
...
...
@@ -29,10 +29,10 @@ bool DeconvBNReluKernel<FPGA, float>::Init(
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
LEAKYRELU
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input
=
const_cast
<
LoD
Tensor
*>
(
param
->
Input
());
const
Tensor
*
bias
=
param
->
InputBias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
());
auto
filter
=
const_cast
<
LoD
Tensor
*>
(
param
->
Filter
());
auto
out
=
param
->
Output
();
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
...
...
src/operators/kernel/fpga/V1/fetch_kernel.cpp
浏览文件 @
4fb30240
...
...
@@ -57,13 +57,9 @@ void dealign(float *src, float *dst, int input_c, int input_h, int input_w) {
}
template
<
>
void
FetchKernel
<
FPGA
,
float
>::
Compute
(
const
FetchParam
<
FPGA
>
&
param
)
{
auto
input
=
const_cast
<
Tensor
*>
(
param
.
InputX
());
if
(
input
->
type
()
==
typeid
(
float
))
{
int
col
=
param
.
Col
();
auto
output
=
&
(
param
.
Out
()
->
at
(
col
));
output
->
ShareDataWith
(
*
input
);
return
;
}
auto
input
=
const_cast
<
LoDTensor
*>
(
param
.
InputX
());
int
col
=
param
.
Col
();
LoDTensor
*
out
=
&
param
.
Out
()
->
at
(
col
);
fpga
::
BypassArgs
args
=
param
.
fpga_bypass_args
;
auto
input_address
=
(
input
->
data
<
half
>
());
...
...
@@ -71,7 +67,7 @@ void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> ¶m) {
float
*
outdata_ptr
=
reinterpret_cast
<
float
*>
(
param
.
fpga_bypass_args
.
output
.
address
);
const
int
num_th
=
32
;
if
((
param
.
Out
()
->
fpga_data_num
)
<
num_th
)
{
if
((
out
->
fpga_data_num
)
<
num_th
)
{
fpga
::
fpga_invalidate
(
input_address
,
(
input
->
fpga_data_num
)
*
sizeof
(
half
));
for
(
int
idx
=
0
;
idx
<
product
(
input
->
dims
());
++
idx
)
{
...
...
@@ -81,14 +77,14 @@ void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> ¶m) {
}
fpga
::
PerformBypass
(
args
);
auto
outC
=
param
.
Out
()
->
dims
()[
1
];
auto
outH
=
param
.
Out
()
->
dims
()[
2
];
auto
outW
=
param
.
Out
()
->
dims
()[
3
];
auto
outC
=
out
->
dims
()[
1
];
auto
outH
=
out
->
dims
()[
2
];
auto
outW
=
out
->
dims
()[
3
];
fpga
::
fpga_invalidate
(
param
.
fpga_bypass_args
.
output
.
address
,
param
.
Out
()
->
fpga_data_num
*
sizeof
(
float
));
out
->
fpga_data_num
*
sizeof
(
float
));
if
(
param
.
Out
()
->
fpga_data_num
!=
product
(
input
->
dims
()))
{
if
(
out
->
fpga_data_num
!=
product
(
input
->
dims
()))
{
float
*
data_tmp
=
reinterpret_cast
<
float
*>
(
malloc
(
outC
*
outH
*
outW
*
sizeof
(
float
)));
dealign
(
outdata_ptr
,
data_tmp
,
outC
,
outH
,
outW
);
...
...
src/operators/kernel/fpga/V1/fusion_fc_relu_kernel.cpp
浏览文件 @
4fb30240
...
...
@@ -25,7 +25,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
paddle_mobile
::
fpga
::
LEAKYRELU
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input_x
=
const_cast
<
LoDTensor
*>
(
param
->
InputX
());
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
InputY
());
auto
filter
=
const_cast
<
LoD
Tensor
*>
(
param
->
InputY
());
const
Tensor
*
input_z
=
param
->
InputZ
();
auto
input_z_ptr
=
input_z
->
data
<
float
>
();
auto
out
=
param
->
Out
();
...
...
src/operators/kernel/fpga/V1/pad2d_kernel.cpp
浏览文件 @
4fb30240
...
...
@@ -16,8 +16,8 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
Pad2
dKernel
<
FPGA
,
float
>::
Init
(
Pad2d
Param
<
FPGA
>
*
param
)
{
Tensor
*
output
=
param
->
Out
()
;
bool
Pad2
DKernel
<
FPGA
,
float
>::
Init
(
Pad2D
Param
<
FPGA
>
*
param
)
{
Tensor
*
output
=
param
->
output_
;
fpga
::
format_fp16_ofm
(
output
);
return
true
;
}
...
...
@@ -39,9 +39,9 @@ void pad2dFunc(const framework::Tensor *input, framework::Tensor *output) {
}
}
template
<
>
void
Pad2
dKernel
<
FPGA
,
float
>::
Compute
(
const
Pad2d
Param
<
FPGA
>
&
param
)
{
auto
in_x
=
param
.
InputX
()
;
auto
out
=
param
.
Out
()
;
void
Pad2
DKernel
<
FPGA
,
float
>::
Compute
(
const
Pad2D
Param
<
FPGA
>
&
param
)
{
auto
in_x
=
param
.
input_
;
auto
out
=
param
.
output_
;
fpga
::
fpga_invalidate
((
void
*
)
in_x
->
data
<
half
>
(),
// NOLINT
in_x
->
numel
()
*
sizeof
(
half
));
pad2dFunc
(
in_x
,
out
);
...
...
src/operators/kernel/fpga/V1/pool_kernel.cpp
浏览文件 @
4fb30240
...
...
@@ -68,7 +68,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
template
<
>
void
PoolKernel
<
FPGA
,
float
>::
Compute
(
const
PoolParam
<
FPGA
>
&
param
)
{
auto
*
input
=
const_cast
<
Tensor
*>
(
param
.
Input
());
auto
*
input
=
const_cast
<
LoD
Tensor
*>
(
param
.
Input
());
if
(
input
->
type
()
==
typeid
(
float
))
{
auto
*
output
=
param
.
Output
();
...
...
src/operators/kernel/fpga/V1/sigmoid_kernel.cpp
浏览文件 @
4fb30240
...
...
@@ -24,7 +24,7 @@ bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) {
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
SIGMOID
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
InputX
());
auto
input
=
const_cast
<
LoD
Tensor
*>
(
param
->
InputX
());
auto
input_ptr
=
input
->
data
<
half
>
();
auto
out
=
param
->
Out
();
fpga
::
format_fp16_ofm
(
out
);
...
...
src/operators/math/channel_wise.h
浏览文件 @
4fb30240
...
...
@@ -33,7 +33,7 @@ void AddChannelWise(const framework::Tensor *input,
// maybe check shape
int
batch_size
=
input
->
dims
()[
0
];
int
channels
=
input
->
dims
()[
1
];
size_
t
spatial_size
=
input
->
dims
()[
2
]
*
input
->
dims
()[
3
];
in
t
spatial_size
=
input
->
dims
()[
2
]
*
input
->
dims
()[
3
];
for
(
int
batch
=
0
;
batch
<
batch_size
;
++
batch
)
{
for
(
int
channel
=
0
;
channel
<
channels
;
++
channel
)
{
...
...
@@ -88,7 +88,7 @@ void ScaleAddChannelWise(const framework::Tensor *input,
// maybe check shape
int
batch_size
=
input
->
dims
()[
0
];
int
channels
=
input
->
dims
()[
1
];
size_
t
spatial_size
=
input
->
dims
()[
2
]
*
input
->
dims
()[
3
];
in
t
spatial_size
=
input
->
dims
()[
2
]
*
input
->
dims
()[
3
];
for
(
int
batch
=
0
;
batch
<
batch_size
;
++
batch
)
{
for
(
int
channel
=
0
;
channel
<
channels
;
++
channel
)
{
...
...
src/operators/math/gemm/cblas.cc
浏览文件 @
4fb30240
...
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
#pragma once
#include "operators/math/gemm/cblas.h"
...
...
@@ -47,3 +49,5 @@ void cblas_sgemv(const bool trans, const int M, const int N, const float alpha,
}
// namespace math
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/pad2d_op.cpp
浏览文件 @
4fb30240
...
...
@@ -37,5 +37,8 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
pad2d
,
ops
::
Pad2DOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
pad2d
,
ops
::
Pad2DOp
);
#endif
#endif // PAD2D_OP
test/fpga/test_rfcn_api.cpp
浏览文件 @
4fb30240
...
...
@@ -12,16 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PADDLE_MOBILE_FPGA
#define PADDLE_MOBILE_FPGA
#endif
#include <fstream>
#include <iostream>
#include "io/paddle_inference_api.h"
#include "../test_helper.h"
#include "../test_include.h"
#ifdef PADDLE_MOBILE_FPGA_V1
#include "fpga/V1/api.h"
#endif
#ifdef PADDLE_MOBILE_FPGA_V2
#include "fpga/V2/api.h"
#endif
static
const
char
*
g_image
=
"../models/rfcn/data.bin"
;
static
const
char
*
g_model
=
"../models/rfcn/model"
;
static
const
char
*
g_param
=
"../models/rfcn/params"
;
#include <string>
void
readStream
(
std
::
string
filename
,
char
*
buf
)
{
std
::
ifstream
in
;
...
...
@@ -35,137 +37,116 @@ void readStream(std::string filename, char *buf) {
auto
length
=
in
.
tellg
();
// report location (this is the length)
in
.
seekg
(
0
,
std
::
ios
::
beg
);
// go back to the beginning
in
.
read
(
buf
,
length
);
DLOG
<<
length
;
in
.
close
();
}
PaddleMobileConfig
GetConfig
()
{
PaddleMobileConfig
config
;
config
.
precision
=
PaddleMobileConfig
::
FP32
;
config
.
device
=
PaddleMobileConfig
::
kFPGA
;
config
.
prog_file
=
g_model
;
config
.
param_file
=
g_param
;
config
.
thread_num
=
1
;
config
.
batch_size
=
1
;
config
.
optimize
=
true
;
config
.
lod_mode
=
true
;
config
.
quantification
=
false
;
return
config
;
}
PaddleMobileConfig
GetConfig1
()
{
PaddleMobileConfig
config
;
config
.
precision
=
PaddleMobileConfig
::
FP32
;
config
.
device
=
PaddleMobileConfig
::
kFPGA
;
config
.
model_dir
=
"../models/resnet50"
;
config
.
thread_num
=
1
;
config
.
batch_size
=
1
;
config
.
optimize
=
true
;
config
.
quantification
=
false
;
return
config
;
void
convert_to_chw
(
int16_t
**
data_in
,
int
channel
,
int
height
,
int
width
,
int
num
,
int16_t
*
data_tmp
)
{
int64_t
amount_per_side
=
width
*
height
;
for
(
int
n
=
0
;
n
<
num
;
n
++
)
{
for
(
int
h
=
0
;
h
<
height
;
h
++
)
{
for
(
int
w
=
0
;
w
<
width
;
w
++
)
{
for
(
int
c
=
0
;
c
<
channel
;
c
++
)
{
*
(
data_tmp
+
n
*
amount_per_side
*
channel
+
c
*
amount_per_side
+
width
*
h
+
w
)
=
*
((
*
data_in
)
++
);
}
}
}
}
}
int
main
()
{
open_device
();
PaddleMobileConfig
config
=
GetConfig
();
auto
predictor
=
CreatePaddlePredictor
<
PaddleMobileConfig
,
PaddleEngineKind
::
kPaddleMobile
>
(
config
);
std
::
cout
<<
"Finishing loading model"
<<
std
::
endl
;
float
img_info
[
3
]
=
{
432
,
1280
,
1.0
f
};
int
img_length
=
432
*
1280
*
3
;
auto
img
=
reinterpret_cast
<
float
*>
(
fpga_malloc
(
img_length
*
sizeof
(
float
)));
readStream
(
g_image
,
reinterpret_cast
<
char
*>
(
img
));
std
::
cout
<<
"Finishing initializing data"
<<
std
::
endl
;
struct
PaddleTensor
t_img_info
,
t_img
;
t_img
.
dtypeid
=
typeid
(
float
);
t_img_info
.
layout
=
LAYOUT_HWC
;
t_img_info
.
shape
=
std
::
vector
<
int
>
({
1
,
3
});
t_img_info
.
name
=
"Image information"
;
t_img_info
.
data
.
Reset
(
img_info
,
3
*
sizeof
(
float
));
t_img
.
dtypeid
=
typeid
(
float
);
t_img
.
layout
=
LAYOUT_HWC
;
t_img
.
shape
=
std
::
vector
<
int
>
({
1
,
432
,
1280
,
3
});
t_img
.
name
=
"Image information"
;
t_img
.
data
.
Reset
(
img
,
img_length
*
sizeof
(
float
));
predictor
->
FeedPaddleTensors
({
t_img_info
,
t_img
});
std
::
cout
<<
"Finishing feeding data "
<<
std
::
endl
;
predictor
->
Predict_From_To
(
0
,
-
1
);
std
::
cout
<<
"Finishing predicting "
<<
std
::
endl
;
std
::
vector
<
PaddleTensor
>
v
;
// No need to initialize v
predictor
->
FetchPaddleTensors
(
&
v
);
// Old data in v will be cleared
std
::
cout
<<
"Output number is "
<<
v
.
size
()
<<
std
::
endl
;
std
::
cout
<<
"out[0] length "
<<
v
[
0
].
data
.
length
()
<<
std
::
endl
;
std
::
cout
<<
"out[1] length "
<<
v
[
1
].
data
.
length
()
<<
std
::
endl
;
std
::
cout
<<
"out[2] length "
<<
v
[
2
].
data
.
length
()
<<
std
::
endl
;
auto
post_nms
=
v
[
0
].
data
.
length
()
/
sizeof
(
float
)
/
8
;
for
(
int
num
=
0
;
num
<
post_nms
;
num
++
)
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
auto
p
=
reinterpret_cast
<
float
*>
(
v
[
0
].
data
.
data
());
std
::
cout
<<
p
[
num
*
8
+
i
]
<<
std
::
endl
;
}
void
dump_stride_half
(
std
::
string
filename
,
Tensor
input_tensor
,
const
int
dumpnum
,
bool
use_chw
)
{
// bool use_chw = true;
if
(
input_tensor
.
dims
().
size
()
!=
4
)
return
;
int
c
=
(
input_tensor
.
dims
())[
1
];
int
h
=
(
input_tensor
.
dims
())[
2
];
int
w
=
(
input_tensor
.
dims
())[
3
];
int
n
=
(
input_tensor
.
dims
())[
0
];
auto
data_ptr
=
input_tensor
.
get_data
();
auto
*
data_ptr_16
=
reinterpret_cast
<
half
*>
(
data_ptr
);
auto
data_tmp
=
data_ptr_16
;
if
(
use_chw
)
{
data_tmp
=
reinterpret_cast
<
half
*>
(
malloc
(
n
*
c
*
h
*
w
*
sizeof
(
int16_t
)));
convert_to_chw
(
&
data_ptr_16
,
c
,
h
,
w
,
n
,
data_tmp
);
}
for
(
int
num
=
0
;
num
<
post_nms
;
num
++
)
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
auto
p
=
reinterpret_cast
<
float
*>
(
v
[
1
].
data
.
data
());
std
::
cout
<<
p
[
num
*
8
+
i
]
<<
std
::
endl
;
}
std
::
ofstream
out
(
filename
.
c_str
());
float
result
=
0
;
int
stride
=
input_tensor
.
numel
()
/
dumpnum
;
stride
=
stride
>
0
?
stride
:
1
;
for
(
int
i
=
0
;
i
<
input_tensor
.
numel
();
i
+=
stride
)
{
result
=
paddle_mobile
::
fpga
::
fp16_2_fp32
(
data_tmp
[
i
]);
out
<<
result
<<
std
::
endl
;
}
for
(
int
num
=
0
;
num
<
post_nms
;
num
++
)
{
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
auto
p
=
reinterpret_cast
<
float
*>
(
v
[
2
].
data
.
data
());
std
::
cout
<<
p
[
num
*
4
+
i
]
<<
std
::
endl
;
}
out
.
close
();
if
(
data_tmp
!=
data_ptr_16
)
{
free
(
data_tmp
);
}
std
::
cout
<<
"Finish getting vector values"
<<
std
::
endl
;
////////////////////////////////////////////////////
}
PaddleTensor
tensor
;
predictor
->
GetPaddleTensor
(
"fetch2"
,
&
tensor
);
for
(
int
i
=
0
;
i
<
post_nms
;
i
++
)
{
auto
p
=
reinterpret_cast
<
float
*>
(
tensor
.
data
.
data
());
std
::
cout
<<
p
[
+
i
]
<<
std
::
endl
;
void
dump_stride_float
(
std
::
string
filename
,
Tensor
input_tensor
,
const
int
dumpnum
)
{
auto
data_ptr
=
reinterpret_cast
<
float
*>
(
input_tensor
.
get_data
());
std
::
ofstream
out
(
filename
.
c_str
());
float
result
=
0
;
int
stride
=
input_tensor
.
numel
()
/
dumpnum
;
stride
=
stride
>
0
?
stride
:
1
;
for
(
int
i
=
0
;
i
<
input_tensor
.
numel
();
i
+=
stride
)
{
result
=
data_ptr
[
i
];
out
<<
result
<<
std
::
endl
;
}
out
.
close
();
}
//////////////////////////////////////////////////////
PaddleMobileConfig
config1
=
GetConfig1
();
auto
predictor1
=
CreatePaddlePredictor
<
PaddleMobileConfig
,
PaddleEngineKind
::
kPaddleMobile
>
(
config1
);
std
::
cout
<<
"Finishing loading model"
<<
std
::
endl
;
int
img_length1
=
224
*
224
*
3
;
auto
img1
=
reinterpret_cast
<
float
*>
(
fpga_malloc
(
img_length1
*
sizeof
(
float
)));
std
::
cout
<<
"Finishing initializing data"
<<
std
::
endl
;
void
dump_stride
(
std
::
string
filename
,
Tensor
input_tensor
,
const
int
dumpnum
,
bool
use_chw
)
{
static
int
i
=
0
;
if
(
input_tensor
.
numel
()
==
0
)
{
return
;
}
if
(
input_tensor
.
type
()
==
typeid
(
float
))
{
DLOG
<<
"op: "
<<
i
++
<<
", float data "
<<
input_tensor
.
numel
();
struct
PaddleTensor
t_img1
;
dump_stride_float
(
filename
,
input_tensor
,
dumpnum
);
}
else
{
DLOG
<<
"op: "
<<
i
++
<<
", half data "
<<
input_tensor
.
numel
();
t_img1
.
dtypeid
=
typeid
(
float
);
t_img1
.
layout
=
LAYOUT_HWC
;
t_img1
.
shape
=
std
::
vector
<
int
>
({
1
,
224
,
224
,
3
});
t_img1
.
name
=
"Image information"
;
t_img1
.
data
.
Reset
(
img1
,
img_length1
*
sizeof
(
float
));
predictor1
->
FeedPaddleTensors
({
t_img1
});
predictor1
->
Predict_From_To
(
0
,
-
1
);
std
::
cout
<<
"Finishing predicting "
<<
std
::
endl
;
dump_stride_half
(
filename
,
input_tensor
,
dumpnum
,
use_chw
);
}
DLOG
<<
"dump input address: "
<<
input_tensor
.
get_data
();
}
std
::
vector
<
PaddleTensor
>
v1
;
// No need to initialize v
predictor1
->
FetchPaddleTensors
(
&
v1
);
// Old data in v will be cleared
std
::
cout
<<
"Output number is "
<<
v1
.
size
()
<<
std
::
endl
;
std
::
cout
<<
"out[0] length "
<<
v1
[
0
].
data
.
length
()
<<
std
::
endl
;
static
const
char
*
g_rfcn_combine
=
"../models/rfcn"
;
static
const
char
*
g_image_src_float
=
"../models/rfcn/data.bin"
;
int
main
()
{
paddle_mobile
::
fpga
::
open_device
();
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
FPGA
>
paddle_mobile
;
if
(
paddle_mobile
.
Load
(
std
::
string
(
g_rfcn_combine
)
+
"/model"
,
std
::
string
(
g_rfcn_combine
)
+
"/params"
,
true
,
false
,
1
,
true
))
{
float
img_info
[
3
]
=
{
768
,
1536
,
768.0
f
/
960.0
f
};
auto
img
=
reinterpret_cast
<
float
*>
(
fpga
::
fpga_malloc
(
768
*
1536
*
3
*
sizeof
(
float
)));
readStream
(
g_image_src_float
,
reinterpret_cast
<
char
*>
(
img
));
std
::
vector
<
void
*>
v
(
3
,
nullptr
);
paddle_mobile
.
FeedData
(
std
::
vector
<
void
*>
({
img_info
,
img
}));
paddle_mobile
.
Predict_To
(
-
1
);
for
(
int
i
=
65
;
i
<
69
;
i
++
)
{
auto
tensor_ptr
=
paddle_mobile
.
FetchResult
(
i
);
std
::
string
saveName
=
"rfcn_"
+
std
::
to_string
(
i
);
paddle_mobile
::
fpga
::
fpga_invalidate
((
*
tensor_ptr
).
get_data
(),
tensor_ptr
->
numel
()
*
sizeof
(
float
));
dump_stride
(
saveName
,
(
*
tensor_ptr
),
tensor_ptr
->
numel
(),
true
);
}
// paddle_mobile.GetResults(&v);
DLOG
<<
"Computation done"
;
fpga
::
fpga_free
(
img
);
}
return
0
;
}
test/net/test_benchmark.cpp
浏览文件 @
4fb30240
...
...
@@ -36,7 +36,10 @@ int main(int argc, char* argv[]) {
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
>
paddle_mobile
;
paddle_mobile
.
SetThreadNum
(
thread_num
);
auto
time1
=
time
();
if
(
paddle_mobile
.
Load
(
fluid_model
,
optimize
))
{
// if (paddle_mobile.Load(fluid_model, optimize, false, 1, true)) {
if
(
paddle_mobile
.
Load
(
std
::
string
(
fluid_model
)
+
"/model"
,
std
::
string
(
fluid_model
)
+
"/params"
,
optimize
,
false
,
1
,
true
))
{
auto
time2
=
time
();
std
::
cout
<<
"load cost :"
<<
time_diff
(
time1
,
time2
)
<<
"ms
\n
"
;
paddle_mobile
::
framework
::
Tensor
input
;
...
...
@@ -51,14 +54,15 @@ int main(int argc, char* argv[]) {
paddle_mobile
::
framework
::
DDim
in_shape
=
paddle_mobile
::
framework
::
make_ddim
(
dims
);
SetupTensor
<
float
>
(
&
input
,
in_shape
,
0.
f
,
255.
f
);
// warmup
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
//
// warmup
for
(
int
i
=
0
;
i
<
2
;
++
i
)
{
paddle_mobile
.
Predict
(
input
);
}
auto
time3
=
time
();
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
paddle_mobile
.
Predict
(
input
);
}
auto
time4
=
time
();
std
::
cout
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
/
10
<<
"ms
\n
"
;
std
::
ostringstream
os
(
"output tensor size: "
);
...
...
@@ -68,7 +72,7 @@ int main(int argc, char* argv[]) {
os
<<
", "
<<
output
->
data
<
float
>
()[
i
];
}
std
::
string
output_str
=
os
.
str
();
std
::
cout
<<
output_str
<<
std
::
endl
;
//
std::cout << output_str << std::endl;
}
return
0
;
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录