Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
4fb30240
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4fb30240
编写于
3月 13, 2019
作者:
H
hjchen2
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update
上级
2bbf3ec6
变更
25
隐藏空白更改
内联
并排
Showing
25 changed file
with
158 addition
and
185 deletion
+158
-185
src/common/log.h
src/common/log.h
+2
-1
src/framework/executor.cpp
src/framework/executor.cpp
+0
-14
src/framework/executor.h
src/framework/executor.h
+0
-1
src/io/api_paddle_mobile.cc
src/io/api_paddle_mobile.cc
+1
-1
src/io/jni/paddle_mobile_jni.cpp
src/io/jni/paddle_mobile_jni.cpp
+0
-2
src/io/paddle_mobile.h
src/io/paddle_mobile.h
+0
-1
src/operators/fusion_deconv_add_bn_op.h
src/operators/fusion_deconv_add_bn_op.h
+1
-1
src/operators/fusion_deconv_add_bn_relu_op.h
src/operators/fusion_deconv_add_bn_relu_op.h
+1
-1
src/operators/fusion_deconv_bn_relu_op.h
src/operators/fusion_deconv_bn_relu_op.h
+1
-1
src/operators/kernel/central-arm-func/conv_arm_func.cpp
src/operators/kernel/central-arm-func/conv_arm_func.cpp
+2
-0
src/operators/kernel/fpga/V1/conv_kernel.cpp
src/operators/kernel/fpga/V1/conv_kernel.cpp
+2
-2
src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp
src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp
+2
-2
src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp
src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp
+2
-2
src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp
src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp
+2
-2
src/operators/kernel/fpga/V1/deconv_bn_relu_kernel.cpp
src/operators/kernel/fpga/V1/deconv_bn_relu_kernel.cpp
+2
-2
src/operators/kernel/fpga/V1/fetch_kernel.cpp
src/operators/kernel/fpga/V1/fetch_kernel.cpp
+9
-13
src/operators/kernel/fpga/V1/fusion_fc_relu_kernel.cpp
src/operators/kernel/fpga/V1/fusion_fc_relu_kernel.cpp
+1
-1
src/operators/kernel/fpga/V1/pad2d_kernel.cpp
src/operators/kernel/fpga/V1/pad2d_kernel.cpp
+5
-5
src/operators/kernel/fpga/V1/pool_kernel.cpp
src/operators/kernel/fpga/V1/pool_kernel.cpp
+1
-1
src/operators/kernel/fpga/V1/sigmoid_kernel.cpp
src/operators/kernel/fpga/V1/sigmoid_kernel.cpp
+1
-1
src/operators/math/channel_wise.h
src/operators/math/channel_wise.h
+2
-2
src/operators/math/gemm/cblas.cc
src/operators/math/gemm/cblas.cc
+4
-0
src/operators/pad2d_op.cpp
src/operators/pad2d_op.cpp
+3
-0
test/fpga/test_rfcn_api.cpp
test/fpga/test_rfcn_api.cpp
+106
-125
test/net/test_benchmark.cpp
test/net/test_benchmark.cpp
+8
-4
未找到文件。
src/common/log.h
浏览文件 @
4fb30240
...
@@ -31,7 +31,8 @@ namespace paddle_mobile {
...
@@ -31,7 +31,8 @@ namespace paddle_mobile {
#ifdef ANDROID
#ifdef ANDROID
extern
const
char
*
ANDROID_LOG_TAG
;
static
const
char
*
ANDROID_LOG_TAG
=
"paddle_mobile LOG built on "
__DATE__
" "
__TIME__
;
#define ANDROIDLOGI(...) \
#define ANDROIDLOGI(...) \
__android_log_print(ANDROID_LOG_INFO, ANDROID_LOG_TAG, __VA_ARGS__); \
__android_log_print(ANDROID_LOG_INFO, ANDROID_LOG_TAG, __VA_ARGS__); \
...
...
src/framework/executor.cpp
浏览文件 @
4fb30240
...
@@ -531,20 +531,6 @@ void Executor<Device, T>::FeedData(const std::vector<void *> &v) {
...
@@ -531,20 +531,6 @@ void Executor<Device, T>::FeedData(const std::vector<void *> &v) {
}
}
}
}
template
<
typename
Device
,
typename
T
>
void
Executor
<
Device
,
T
>::
FeedTensorData
(
const
vector
<
framework
::
Tensor
>
&
v
)
{
auto
input_size
=
v
.
size
();
int
index
=
0
;
auto
vars
=
program_
.
scope
->
VarContain
(
"feed"
,
&
index
);
PADDLE_MOBILE_ENFORCE
(
input_size
==
vars
.
size
(),
"input data number not correct"
);
for
(
int
i
=
0
;
i
<
input_size
;
i
++
)
{
auto
var
=
program_
.
scope
->
Var
(
"feed"
,
i
+
index
);
auto
feed_tensor
=
var
->
template
GetMutable
<
LoDTensor
>();
feed_tensor
->
ShareDataWith
(
v
[
i
]);
}
}
template
<
typename
Device
,
typename
T
>
template
<
typename
Device
,
typename
T
>
void
Executor
<
Device
,
T
>::
GetResults
(
std
::
vector
<
void
*>
*
v
)
{
void
Executor
<
Device
,
T
>::
GetResults
(
std
::
vector
<
void
*>
*
v
)
{
auto
output_size
=
v
->
size
();
auto
output_size
=
v
->
size
();
...
...
src/framework/executor.h
浏览文件 @
4fb30240
...
@@ -53,7 +53,6 @@ class Executor {
...
@@ -53,7 +53,6 @@ class Executor {
void
InjectVariable
(
const
Tensor
&
t
,
std
::
string
var_name
);
void
InjectVariable
(
const
Tensor
&
t
,
std
::
string
var_name
);
void
FeedData
(
const
Tensor
&
t
);
void
FeedData
(
const
Tensor
&
t
);
void
FeedData
(
const
std
::
vector
<
void
*>
&
v
);
void
FeedData
(
const
std
::
vector
<
void
*>
&
v
);
void
FeedTensorData
(
const
std
::
vector
<
framework
::
Tensor
>
&
v
);
void
GetResults
(
std
::
vector
<
void
*>
*
v
);
void
GetResults
(
std
::
vector
<
void
*>
*
v
);
void
GetTensorResults
(
std
::
vector
<
framework
::
Tensor
*>
*
v
);
void
GetTensorResults
(
std
::
vector
<
framework
::
Tensor
*>
*
v
);
...
...
src/io/api_paddle_mobile.cc
浏览文件 @
4fb30240
...
@@ -146,7 +146,7 @@ void PaddleMobilePredictor<Device, T>::FeedPaddleTensors(
...
@@ -146,7 +146,7 @@ void PaddleMobilePredictor<Device, T>::FeedPaddleTensors(
tensors
[
i
].
init
(
typeid
(
float
));
tensors
[
i
].
init
(
typeid
(
float
));
ConvertPaddleTensors
(
inputs
[
i
],
&
tensors
[
i
]);
ConvertPaddleTensors
(
inputs
[
i
],
&
tensors
[
i
]);
}
}
paddle_mobile_
->
FeedTensorData
(
tensors
);
//
paddle_mobile_->FeedTensorData(tensors);
}
}
template
<
typename
Device
,
typename
T
>
template
<
typename
Device
,
typename
T
>
...
...
src/io/jni/paddle_mobile_jni.cpp
浏览文件 @
4fb30240
...
@@ -39,8 +39,6 @@ using framework::Tensor;
...
@@ -39,8 +39,6 @@ using framework::Tensor;
using
paddle_mobile
::
CPU
;
using
paddle_mobile
::
CPU
;
using
std
::
string
;
using
std
::
string
;
const
char
*
ANDROID_LOG_TAG
=
"paddle_mobile LOG built on "
__DATE__
" "
__TIME__
;
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
>
paddle_mobile
;
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
>
paddle_mobile
;
static
std
::
mutex
shared_mutex
;
static
std
::
mutex
shared_mutex
;
...
...
src/io/paddle_mobile.h
浏览文件 @
4fb30240
...
@@ -91,7 +91,6 @@ class PaddleMobile {
...
@@ -91,7 +91,6 @@ class PaddleMobile {
void
InjectVariable
(
const
framework
::
Tensor
&
t
,
std
::
string
var_name
);
void
InjectVariable
(
const
framework
::
Tensor
&
t
,
std
::
string
var_name
);
void
FeedData
(
const
framework
::
Tensor
&
t
);
void
FeedData
(
const
framework
::
Tensor
&
t
);
void
FeedData
(
const
std
::
vector
<
void
*>
&
v
);
void
FeedData
(
const
std
::
vector
<
void
*>
&
v
);
void
FeedTensorData
(
const
std
::
vector
<
framework
::
Tensor
>
&
v
);
void
GetResults
(
std
::
vector
<
void
*>
*
v
);
void
GetResults
(
std
::
vector
<
void
*>
*
v
);
void
GetTensorResults
(
std
::
vector
<
framework
::
Tensor
*>
*
v
);
void
GetTensorResults
(
std
::
vector
<
framework
::
Tensor
*>
*
v
);
...
...
src/operators/fusion_deconv_add_bn_op.h
浏览文件 @
4fb30240
...
@@ -57,7 +57,7 @@ class FusionDeconvAddBNOp : public framework::OperatorWithKernel<
...
@@ -57,7 +57,7 @@ class FusionDeconvAddBNOp : public framework::OperatorWithKernel<
FusionDeconvAddBNOp
(
const
string
&
type
,
const
VariableNameMap
&
inputs
,
FusionDeconvAddBNOp
(
const
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
,
const
framework
::
AttributeMap
&
attrs
,
std
::
shared_ptr
<
framework
::
Scope
>
scope
)
framework
::
Scope
*
scope
)
:
framework
::
OperatorWithKernel
<
:
framework
::
OperatorWithKernel
<
DeviceType
,
FusionDeconvAddBNParam
<
DeviceType
>
,
DeviceType
,
FusionDeconvAddBNParam
<
DeviceType
>
,
operators
::
DeconvAddBNKernel
<
DeviceType
,
T
>>
(
type
,
inputs
,
outputs
,
operators
::
DeconvAddBNKernel
<
DeviceType
,
T
>>
(
type
,
inputs
,
outputs
,
...
...
src/operators/fusion_deconv_add_bn_relu_op.h
浏览文件 @
4fb30240
...
@@ -59,7 +59,7 @@ class FusionDeconvAddBNReluOp
...
@@ -59,7 +59,7 @@ class FusionDeconvAddBNReluOp
FusionDeconvAddBNReluOp
(
const
string
&
type
,
const
VariableNameMap
&
inputs
,
FusionDeconvAddBNReluOp
(
const
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
,
const
framework
::
AttributeMap
&
attrs
,
std
::
shared_ptr
<
framework
::
Scope
>
scope
)
framework
::
Scope
*
scope
)
:
framework
::
OperatorWithKernel
<
:
framework
::
OperatorWithKernel
<
DeviceType
,
FusionDeconvAddBNReluParam
<
DeviceType
>
,
DeviceType
,
FusionDeconvAddBNReluParam
<
DeviceType
>
,
operators
::
DeconvAddBNReluKernel
<
DeviceType
,
T
>>
(
operators
::
DeconvAddBNReluKernel
<
DeviceType
,
T
>>
(
...
...
src/operators/fusion_deconv_bn_relu_op.h
浏览文件 @
4fb30240
...
@@ -56,7 +56,7 @@ class FusionDeconvBNReluOp
...
@@ -56,7 +56,7 @@ class FusionDeconvBNReluOp
FusionDeconvBNReluOp
(
const
string
&
type
,
const
VariableNameMap
&
inputs
,
FusionDeconvBNReluOp
(
const
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
,
const
framework
::
AttributeMap
&
attrs
,
std
::
shared_ptr
<
framework
::
Scope
>
scope
)
framework
::
Scope
*
scope
)
:
framework
::
OperatorWithKernel
<
:
framework
::
OperatorWithKernel
<
DeviceType
,
FusionDeconvBNReluParam
<
DeviceType
>
,
DeviceType
,
FusionDeconvBNReluParam
<
DeviceType
>
,
operators
::
DeconvBNReluKernel
<
DeviceType
,
T
>>
(
type
,
inputs
,
outputs
,
operators
::
DeconvBNReluKernel
<
DeviceType
,
T
>>
(
type
,
inputs
,
outputs
,
...
...
src/operators/kernel/central-arm-func/conv_arm_func.cpp
浏览文件 @
4fb30240
...
@@ -47,6 +47,7 @@ bool IsExpand(const std::vector<int64_t> &filter_dim,
...
@@ -47,6 +47,7 @@ bool IsExpand(const std::vector<int64_t> &filter_dim,
return
!
(
filter_1
&&
strides_1
&&
padding_0
&&
dilation_1
);
return
!
(
filter_1
&&
strides_1
&&
padding_0
&&
dilation_1
);
}
}
#ifdef PADDLE_MOBILE_CPU
template
<
typename
Itype
,
typename
Otype
>
template
<
typename
Itype
,
typename
Otype
>
void
GemmConv
(
const
ConvParam
<
CPU
>
&
param
)
{
void
GemmConv
(
const
ConvParam
<
CPU
>
&
param
)
{
const
Tensor
*
input
=
param
.
Input
();
const
Tensor
*
input
=
param
.
Input
();
...
@@ -241,6 +242,7 @@ template void GemmConv<int8_t, int32_t>(const ConvParam<CPU> ¶m);
...
@@ -241,6 +242,7 @@ template void GemmConv<int8_t, int32_t>(const ConvParam<CPU> ¶m);
template
void
DepthwiseConv3x3
<
int8_t
,
int32_t
>(
const
ConvParam
<
CPU
>
&
param
);
template
void
DepthwiseConv3x3
<
int8_t
,
int32_t
>(
const
ConvParam
<
CPU
>
&
param
);
template
void
DepthwiseConv5x5
<
int8_t
,
int32_t
>(
const
ConvParam
<
CPU
>
&
param
);
template
void
DepthwiseConv5x5
<
int8_t
,
int32_t
>(
const
ConvParam
<
CPU
>
&
param
);
#endif
#endif
#endif
}
// namespace operators
}
// namespace operators
}
// namespace paddle_mobile
}
// namespace paddle_mobile
src/operators/kernel/fpga/V1/conv_kernel.cpp
浏览文件 @
4fb30240
...
@@ -24,8 +24,8 @@ bool ConvKernel<FPGA, float>::Init(ConvParam<FPGA> *param) {
...
@@ -24,8 +24,8 @@ bool ConvKernel<FPGA, float>::Init(ConvParam<FPGA> *param) {
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
NONE
;
paddle_mobile
::
fpga
::
NONE
;
int16_t
leaky_relu_negative_slope
=
0
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input
=
const_cast
<
LoD
Tensor
*>
(
param
->
Input
());
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
());
auto
filter
=
const_cast
<
LoD
Tensor
*>
(
param
->
Filter
());
auto
out
=
param
->
Output
();
auto
out
=
param
->
Output
();
int
channel
=
out
->
dims
()[
1
];
int
channel
=
out
->
dims
()[
1
];
auto
bs_ptr
=
auto
bs_ptr
=
...
...
src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp
浏览文件 @
4fb30240
...
@@ -27,10 +27,10 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
...
@@ -27,10 +27,10 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
NONE
;
paddle_mobile
::
fpga
::
NONE
;
int16_t
leaky_relu_negative_slope
=
0
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input
=
const_cast
<
LoD
Tensor
*>
(
param
->
Input
());
// const Tensor *bias = param->Bias();
// const Tensor *bias = param->Bias();
// auto bias_ptr = bias->data<float>();
// auto bias_ptr = bias->data<float>();
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
());
auto
filter
=
const_cast
<
LoD
Tensor
*>
(
param
->
Filter
());
auto
out
=
param
->
Output
();
auto
out
=
param
->
Output
();
// PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
// PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
...
...
src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp
浏览文件 @
4fb30240
...
@@ -27,10 +27,10 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
...
@@ -27,10 +27,10 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
NONE
;
paddle_mobile
::
fpga
::
NONE
;
int16_t
leaky_relu_negative_slope
=
0
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input
=
const_cast
<
LoD
Tensor
*>
(
param
->
Input
());
const
Tensor
*
bias
=
param
->
InputBias
();
const
Tensor
*
bias
=
param
->
InputBias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
());
auto
filter
=
const_cast
<
LoD
Tensor
*>
(
param
->
Filter
());
auto
out
=
param
->
Output
();
auto
out
=
param
->
Output
();
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
...
...
src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp
浏览文件 @
4fb30240
...
@@ -28,10 +28,10 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
...
@@ -28,10 +28,10 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
LEAKYRELU
;
paddle_mobile
::
fpga
::
LEAKYRELU
;
int16_t
leaky_relu_negative_slope
=
0
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input
=
const_cast
<
LoD
Tensor
*>
(
param
->
Input
());
const
Tensor
*
bias
=
param
->
InputBias
();
const
Tensor
*
bias
=
param
->
InputBias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
());
auto
filter
=
const_cast
<
LoD
Tensor
*>
(
param
->
Filter
());
auto
out
=
param
->
Output
();
auto
out
=
param
->
Output
();
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
...
...
src/operators/kernel/fpga/V1/deconv_bn_relu_kernel.cpp
浏览文件 @
4fb30240
...
@@ -29,10 +29,10 @@ bool DeconvBNReluKernel<FPGA, float>::Init(
...
@@ -29,10 +29,10 @@ bool DeconvBNReluKernel<FPGA, float>::Init(
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
LEAKYRELU
;
paddle_mobile
::
fpga
::
LEAKYRELU
;
int16_t
leaky_relu_negative_slope
=
0
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input
=
const_cast
<
LoD
Tensor
*>
(
param
->
Input
());
const
Tensor
*
bias
=
param
->
InputBias
();
const
Tensor
*
bias
=
param
->
InputBias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
());
auto
filter
=
const_cast
<
LoD
Tensor
*>
(
param
->
Filter
());
auto
out
=
param
->
Output
();
auto
out
=
param
->
Output
();
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
...
...
src/operators/kernel/fpga/V1/fetch_kernel.cpp
浏览文件 @
4fb30240
...
@@ -57,13 +57,9 @@ void dealign(float *src, float *dst, int input_c, int input_h, int input_w) {
...
@@ -57,13 +57,9 @@ void dealign(float *src, float *dst, int input_c, int input_h, int input_w) {
}
}
template
<
>
template
<
>
void
FetchKernel
<
FPGA
,
float
>::
Compute
(
const
FetchParam
<
FPGA
>
&
param
)
{
void
FetchKernel
<
FPGA
,
float
>::
Compute
(
const
FetchParam
<
FPGA
>
&
param
)
{
auto
input
=
const_cast
<
Tensor
*>
(
param
.
InputX
());
auto
input
=
const_cast
<
LoDTensor
*>
(
param
.
InputX
());
if
(
input
->
type
()
==
typeid
(
float
))
{
int
col
=
param
.
Col
();
int
col
=
param
.
Col
();
LoDTensor
*
out
=
&
param
.
Out
()
->
at
(
col
);
auto
output
=
&
(
param
.
Out
()
->
at
(
col
));
output
->
ShareDataWith
(
*
input
);
return
;
}
fpga
::
BypassArgs
args
=
param
.
fpga_bypass_args
;
fpga
::
BypassArgs
args
=
param
.
fpga_bypass_args
;
auto
input_address
=
(
input
->
data
<
half
>
());
auto
input_address
=
(
input
->
data
<
half
>
());
...
@@ -71,7 +67,7 @@ void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> ¶m) {
...
@@ -71,7 +67,7 @@ void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> ¶m) {
float
*
outdata_ptr
=
float
*
outdata_ptr
=
reinterpret_cast
<
float
*>
(
param
.
fpga_bypass_args
.
output
.
address
);
reinterpret_cast
<
float
*>
(
param
.
fpga_bypass_args
.
output
.
address
);
const
int
num_th
=
32
;
const
int
num_th
=
32
;
if
((
param
.
Out
()
->
fpga_data_num
)
<
num_th
)
{
if
((
out
->
fpga_data_num
)
<
num_th
)
{
fpga
::
fpga_invalidate
(
input_address
,
(
input
->
fpga_data_num
)
*
sizeof
(
half
));
fpga
::
fpga_invalidate
(
input_address
,
(
input
->
fpga_data_num
)
*
sizeof
(
half
));
for
(
int
idx
=
0
;
idx
<
product
(
input
->
dims
());
++
idx
)
{
for
(
int
idx
=
0
;
idx
<
product
(
input
->
dims
());
++
idx
)
{
...
@@ -81,14 +77,14 @@ void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> ¶m) {
...
@@ -81,14 +77,14 @@ void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> ¶m) {
}
}
fpga
::
PerformBypass
(
args
);
fpga
::
PerformBypass
(
args
);
auto
outC
=
param
.
Out
()
->
dims
()[
1
];
auto
outC
=
out
->
dims
()[
1
];
auto
outH
=
param
.
Out
()
->
dims
()[
2
];
auto
outH
=
out
->
dims
()[
2
];
auto
outW
=
param
.
Out
()
->
dims
()[
3
];
auto
outW
=
out
->
dims
()[
3
];
fpga
::
fpga_invalidate
(
param
.
fpga_bypass_args
.
output
.
address
,
fpga
::
fpga_invalidate
(
param
.
fpga_bypass_args
.
output
.
address
,
param
.
Out
()
->
fpga_data_num
*
sizeof
(
float
));
out
->
fpga_data_num
*
sizeof
(
float
));
if
(
param
.
Out
()
->
fpga_data_num
!=
product
(
input
->
dims
()))
{
if
(
out
->
fpga_data_num
!=
product
(
input
->
dims
()))
{
float
*
data_tmp
=
float
*
data_tmp
=
reinterpret_cast
<
float
*>
(
malloc
(
outC
*
outH
*
outW
*
sizeof
(
float
)));
reinterpret_cast
<
float
*>
(
malloc
(
outC
*
outH
*
outW
*
sizeof
(
float
)));
dealign
(
outdata_ptr
,
data_tmp
,
outC
,
outH
,
outW
);
dealign
(
outdata_ptr
,
data_tmp
,
outC
,
outH
,
outW
);
...
...
src/operators/kernel/fpga/V1/fusion_fc_relu_kernel.cpp
浏览文件 @
4fb30240
...
@@ -25,7 +25,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
...
@@ -25,7 +25,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
paddle_mobile
::
fpga
::
LEAKYRELU
;
paddle_mobile
::
fpga
::
LEAKYRELU
;
int16_t
leaky_relu_negative_slope
=
0
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input_x
=
const_cast
<
LoDTensor
*>
(
param
->
InputX
());
auto
input_x
=
const_cast
<
LoDTensor
*>
(
param
->
InputX
());
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
InputY
());
auto
filter
=
const_cast
<
LoD
Tensor
*>
(
param
->
InputY
());
const
Tensor
*
input_z
=
param
->
InputZ
();
const
Tensor
*
input_z
=
param
->
InputZ
();
auto
input_z_ptr
=
input_z
->
data
<
float
>
();
auto
input_z_ptr
=
input_z
->
data
<
float
>
();
auto
out
=
param
->
Out
();
auto
out
=
param
->
Out
();
...
...
src/operators/kernel/fpga/V1/pad2d_kernel.cpp
浏览文件 @
4fb30240
...
@@ -16,8 +16,8 @@ limitations under the License. */
...
@@ -16,8 +16,8 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
operators
{
namespace
operators
{
template
<
>
template
<
>
bool
Pad2
dKernel
<
FPGA
,
float
>::
Init
(
Pad2d
Param
<
FPGA
>
*
param
)
{
bool
Pad2
DKernel
<
FPGA
,
float
>::
Init
(
Pad2D
Param
<
FPGA
>
*
param
)
{
Tensor
*
output
=
param
->
Out
()
;
Tensor
*
output
=
param
->
output_
;
fpga
::
format_fp16_ofm
(
output
);
fpga
::
format_fp16_ofm
(
output
);
return
true
;
return
true
;
}
}
...
@@ -39,9 +39,9 @@ void pad2dFunc(const framework::Tensor *input, framework::Tensor *output) {
...
@@ -39,9 +39,9 @@ void pad2dFunc(const framework::Tensor *input, framework::Tensor *output) {
}
}
}
}
template
<
>
template
<
>
void
Pad2
dKernel
<
FPGA
,
float
>::
Compute
(
const
Pad2d
Param
<
FPGA
>
&
param
)
{
void
Pad2
DKernel
<
FPGA
,
float
>::
Compute
(
const
Pad2D
Param
<
FPGA
>
&
param
)
{
auto
in_x
=
param
.
InputX
()
;
auto
in_x
=
param
.
input_
;
auto
out
=
param
.
Out
()
;
auto
out
=
param
.
output_
;
fpga
::
fpga_invalidate
((
void
*
)
in_x
->
data
<
half
>
(),
// NOLINT
fpga
::
fpga_invalidate
((
void
*
)
in_x
->
data
<
half
>
(),
// NOLINT
in_x
->
numel
()
*
sizeof
(
half
));
in_x
->
numel
()
*
sizeof
(
half
));
pad2dFunc
(
in_x
,
out
);
pad2dFunc
(
in_x
,
out
);
...
...
src/operators/kernel/fpga/V1/pool_kernel.cpp
浏览文件 @
4fb30240
...
@@ -68,7 +68,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
...
@@ -68,7 +68,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
template
<
>
template
<
>
void
PoolKernel
<
FPGA
,
float
>::
Compute
(
const
PoolParam
<
FPGA
>
&
param
)
{
void
PoolKernel
<
FPGA
,
float
>::
Compute
(
const
PoolParam
<
FPGA
>
&
param
)
{
auto
*
input
=
const_cast
<
Tensor
*>
(
param
.
Input
());
auto
*
input
=
const_cast
<
LoD
Tensor
*>
(
param
.
Input
());
if
(
input
->
type
()
==
typeid
(
float
))
{
if
(
input
->
type
()
==
typeid
(
float
))
{
auto
*
output
=
param
.
Output
();
auto
*
output
=
param
.
Output
();
...
...
src/operators/kernel/fpga/V1/sigmoid_kernel.cpp
浏览文件 @
4fb30240
...
@@ -24,7 +24,7 @@ bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) {
...
@@ -24,7 +24,7 @@ bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) {
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
SIGMOID
;
paddle_mobile
::
fpga
::
SIGMOID
;
int16_t
leaky_relu_negative_slope
=
0
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
InputX
());
auto
input
=
const_cast
<
LoD
Tensor
*>
(
param
->
InputX
());
auto
input_ptr
=
input
->
data
<
half
>
();
auto
input_ptr
=
input
->
data
<
half
>
();
auto
out
=
param
->
Out
();
auto
out
=
param
->
Out
();
fpga
::
format_fp16_ofm
(
out
);
fpga
::
format_fp16_ofm
(
out
);
...
...
src/operators/math/channel_wise.h
浏览文件 @
4fb30240
...
@@ -33,7 +33,7 @@ void AddChannelWise(const framework::Tensor *input,
...
@@ -33,7 +33,7 @@ void AddChannelWise(const framework::Tensor *input,
// maybe check shape
// maybe check shape
int
batch_size
=
input
->
dims
()[
0
];
int
batch_size
=
input
->
dims
()[
0
];
int
channels
=
input
->
dims
()[
1
];
int
channels
=
input
->
dims
()[
1
];
size_
t
spatial_size
=
input
->
dims
()[
2
]
*
input
->
dims
()[
3
];
in
t
spatial_size
=
input
->
dims
()[
2
]
*
input
->
dims
()[
3
];
for
(
int
batch
=
0
;
batch
<
batch_size
;
++
batch
)
{
for
(
int
batch
=
0
;
batch
<
batch_size
;
++
batch
)
{
for
(
int
channel
=
0
;
channel
<
channels
;
++
channel
)
{
for
(
int
channel
=
0
;
channel
<
channels
;
++
channel
)
{
...
@@ -88,7 +88,7 @@ void ScaleAddChannelWise(const framework::Tensor *input,
...
@@ -88,7 +88,7 @@ void ScaleAddChannelWise(const framework::Tensor *input,
// maybe check shape
// maybe check shape
int
batch_size
=
input
->
dims
()[
0
];
int
batch_size
=
input
->
dims
()[
0
];
int
channels
=
input
->
dims
()[
1
];
int
channels
=
input
->
dims
()[
1
];
size_
t
spatial_size
=
input
->
dims
()[
2
]
*
input
->
dims
()[
3
];
in
t
spatial_size
=
input
->
dims
()[
2
]
*
input
->
dims
()[
3
];
for
(
int
batch
=
0
;
batch
<
batch_size
;
++
batch
)
{
for
(
int
batch
=
0
;
batch
<
batch_size
;
++
batch
)
{
for
(
int
channel
=
0
;
channel
<
channels
;
++
channel
)
{
for
(
int
channel
=
0
;
channel
<
channels
;
++
channel
)
{
...
...
src/operators/math/gemm/cblas.cc
浏览文件 @
4fb30240
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
#pragma once
#pragma once
#include "operators/math/gemm/cblas.h"
#include "operators/math/gemm/cblas.h"
...
@@ -47,3 +49,5 @@ void cblas_sgemv(const bool trans, const int M, const int N, const float alpha,
...
@@ -47,3 +49,5 @@ void cblas_sgemv(const bool trans, const int M, const int N, const float alpha,
}
// namespace math
}
// namespace math
}
// namespace operators
}
// namespace operators
}
// namespace paddle_mobile
}
// namespace paddle_mobile
#endif
src/operators/pad2d_op.cpp
浏览文件 @
4fb30240
...
@@ -37,5 +37,8 @@ namespace ops = paddle_mobile::operators;
...
@@ -37,5 +37,8 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
pad2d
,
ops
::
Pad2DOp
);
REGISTER_OPERATOR_CPU
(
pad2d
,
ops
::
Pad2DOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
pad2d
,
ops
::
Pad2DOp
);
#endif
#endif // PAD2D_OP
#endif // PAD2D_OP
test/fpga/test_rfcn_api.cpp
浏览文件 @
4fb30240
...
@@ -12,16 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,16 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifndef PADDLE_MOBILE_FPGA
#define PADDLE_MOBILE_FPGA
#endif
#include <fstream>
#include <iostream>
#include <iostream>
#include "io/paddle_inference_api.h"
#include "../test_helper.h"
#include "../test_include.h"
#ifdef PADDLE_MOBILE_FPGA_V1
#include "fpga/V1/api.h"
#endif
#ifdef PADDLE_MOBILE_FPGA_V2
#include "fpga/V2/api.h"
#endif
static
const
char
*
g_image
=
"../models/rfcn/data.bin"
;
#include <string>
static
const
char
*
g_model
=
"../models/rfcn/model"
;
static
const
char
*
g_param
=
"../models/rfcn/params"
;
void
readStream
(
std
::
string
filename
,
char
*
buf
)
{
void
readStream
(
std
::
string
filename
,
char
*
buf
)
{
std
::
ifstream
in
;
std
::
ifstream
in
;
...
@@ -35,137 +37,116 @@ void readStream(std::string filename, char *buf) {
...
@@ -35,137 +37,116 @@ void readStream(std::string filename, char *buf) {
auto
length
=
in
.
tellg
();
// report location (this is the length)
auto
length
=
in
.
tellg
();
// report location (this is the length)
in
.
seekg
(
0
,
std
::
ios
::
beg
);
// go back to the beginning
in
.
seekg
(
0
,
std
::
ios
::
beg
);
// go back to the beginning
in
.
read
(
buf
,
length
);
in
.
read
(
buf
,
length
);
DLOG
<<
length
;
in
.
close
();
in
.
close
();
}
}
PaddleMobileConfig
GetConfig
()
{
void
convert_to_chw
(
int16_t
**
data_in
,
int
channel
,
int
height
,
int
width
,
PaddleMobileConfig
config
;
int
num
,
int16_t
*
data_tmp
)
{
config
.
precision
=
PaddleMobileConfig
::
FP32
;
int64_t
amount_per_side
=
width
*
height
;
config
.
device
=
PaddleMobileConfig
::
kFPGA
;
for
(
int
n
=
0
;
n
<
num
;
n
++
)
{
config
.
prog_file
=
g_model
;
for
(
int
h
=
0
;
h
<
height
;
h
++
)
{
config
.
param_file
=
g_param
;
for
(
int
w
=
0
;
w
<
width
;
w
++
)
{
config
.
thread_num
=
1
;
for
(
int
c
=
0
;
c
<
channel
;
c
++
)
{
config
.
batch_size
=
1
;
*
(
data_tmp
+
n
*
amount_per_side
*
channel
+
c
*
amount_per_side
+
config
.
optimize
=
true
;
width
*
h
+
w
)
=
*
((
*
data_in
)
++
);
config
.
lod_mode
=
true
;
}
config
.
quantification
=
false
;
}
return
config
;
}
}
}
PaddleMobileConfig
GetConfig1
()
{
PaddleMobileConfig
config
;
config
.
precision
=
PaddleMobileConfig
::
FP32
;
config
.
device
=
PaddleMobileConfig
::
kFPGA
;
config
.
model_dir
=
"../models/resnet50"
;
config
.
thread_num
=
1
;
config
.
batch_size
=
1
;
config
.
optimize
=
true
;
config
.
quantification
=
false
;
return
config
;
}
}
int
main
()
{
void
dump_stride_half
(
std
::
string
filename
,
Tensor
input_tensor
,
open_device
();
const
int
dumpnum
,
bool
use_chw
)
{
// bool use_chw = true;
PaddleMobileConfig
config
=
GetConfig
();
if
(
input_tensor
.
dims
().
size
()
!=
4
)
return
;
auto
predictor
=
int
c
=
(
input_tensor
.
dims
())[
1
];
CreatePaddlePredictor
<
PaddleMobileConfig
,
int
h
=
(
input_tensor
.
dims
())[
2
];
PaddleEngineKind
::
kPaddleMobile
>
(
config
);
int
w
=
(
input_tensor
.
dims
())[
3
];
int
n
=
(
input_tensor
.
dims
())[
0
];
std
::
cout
<<
"Finishing loading model"
<<
std
::
endl
;
auto
data_ptr
=
input_tensor
.
get_data
();
auto
*
data_ptr_16
=
reinterpret_cast
<
half
*>
(
data_ptr
);
float
img_info
[
3
]
=
{
432
,
1280
,
1.0
f
};
auto
data_tmp
=
data_ptr_16
;
int
img_length
=
432
*
1280
*
3
;
if
(
use_chw
)
{
auto
img
=
reinterpret_cast
<
float
*>
(
fpga_malloc
(
img_length
*
sizeof
(
float
)));
data_tmp
=
readStream
(
g_image
,
reinterpret_cast
<
char
*>
(
img
));
reinterpret_cast
<
half
*>
(
malloc
(
n
*
c
*
h
*
w
*
sizeof
(
int16_t
)));
convert_to_chw
(
&
data_ptr_16
,
c
,
h
,
w
,
n
,
data_tmp
);
std
::
cout
<<
"Finishing initializing data"
<<
std
::
endl
;
struct
PaddleTensor
t_img_info
,
t_img
;
t_img
.
dtypeid
=
typeid
(
float
);
t_img_info
.
layout
=
LAYOUT_HWC
;
t_img_info
.
shape
=
std
::
vector
<
int
>
({
1
,
3
});
t_img_info
.
name
=
"Image information"
;
t_img_info
.
data
.
Reset
(
img_info
,
3
*
sizeof
(
float
));
t_img
.
dtypeid
=
typeid
(
float
);
t_img
.
layout
=
LAYOUT_HWC
;
t_img
.
shape
=
std
::
vector
<
int
>
({
1
,
432
,
1280
,
3
});
t_img
.
name
=
"Image information"
;
t_img
.
data
.
Reset
(
img
,
img_length
*
sizeof
(
float
));
predictor
->
FeedPaddleTensors
({
t_img_info
,
t_img
});
std
::
cout
<<
"Finishing feeding data "
<<
std
::
endl
;
predictor
->
Predict_From_To
(
0
,
-
1
);
std
::
cout
<<
"Finishing predicting "
<<
std
::
endl
;
std
::
vector
<
PaddleTensor
>
v
;
// No need to initialize v
predictor
->
FetchPaddleTensors
(
&
v
);
// Old data in v will be cleared
std
::
cout
<<
"Output number is "
<<
v
.
size
()
<<
std
::
endl
;
std
::
cout
<<
"out[0] length "
<<
v
[
0
].
data
.
length
()
<<
std
::
endl
;
std
::
cout
<<
"out[1] length "
<<
v
[
1
].
data
.
length
()
<<
std
::
endl
;
std
::
cout
<<
"out[2] length "
<<
v
[
2
].
data
.
length
()
<<
std
::
endl
;
auto
post_nms
=
v
[
0
].
data
.
length
()
/
sizeof
(
float
)
/
8
;
for
(
int
num
=
0
;
num
<
post_nms
;
num
++
)
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
auto
p
=
reinterpret_cast
<
float
*>
(
v
[
0
].
data
.
data
());
std
::
cout
<<
p
[
num
*
8
+
i
]
<<
std
::
endl
;
}
}
}
for
(
int
num
=
0
;
num
<
post_nms
;
num
++
)
{
std
::
ofstream
out
(
filename
.
c_str
());
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
float
result
=
0
;
auto
p
=
reinterpret_cast
<
float
*>
(
v
[
1
].
data
.
data
());
int
stride
=
input_tensor
.
numel
()
/
dumpnum
;
std
::
cout
<<
p
[
num
*
8
+
i
]
<<
std
::
endl
;
stride
=
stride
>
0
?
stride
:
1
;
}
for
(
int
i
=
0
;
i
<
input_tensor
.
numel
();
i
+=
stride
)
{
result
=
paddle_mobile
::
fpga
::
fp16_2_fp32
(
data_tmp
[
i
]);
out
<<
result
<<
std
::
endl
;
}
}
for
(
int
num
=
0
;
num
<
post_nms
;
num
++
)
{
out
.
close
();
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
if
(
data_tmp
!=
data_ptr_16
)
{
auto
p
=
reinterpret_cast
<
float
*>
(
v
[
2
].
data
.
data
());
free
(
data_tmp
);
std
::
cout
<<
p
[
num
*
4
+
i
]
<<
std
::
endl
;
}
}
}
std
::
cout
<<
"Finish getting vector values"
<<
std
::
endl
;
}
////////////////////////////////////////////////////
PaddleTensor
tensor
;
void
dump_stride_float
(
std
::
string
filename
,
Tensor
input_tensor
,
predictor
->
GetPaddleTensor
(
"fetch2"
,
&
tensor
);
const
int
dumpnum
)
{
for
(
int
i
=
0
;
i
<
post_nms
;
i
++
)
{
auto
data_ptr
=
reinterpret_cast
<
float
*>
(
input_tensor
.
get_data
());
auto
p
=
reinterpret_cast
<
float
*>
(
tensor
.
data
.
data
());
std
::
ofstream
out
(
filename
.
c_str
());
std
::
cout
<<
p
[
+
i
]
<<
std
::
endl
;
float
result
=
0
;
int
stride
=
input_tensor
.
numel
()
/
dumpnum
;
stride
=
stride
>
0
?
stride
:
1
;
for
(
int
i
=
0
;
i
<
input_tensor
.
numel
();
i
+=
stride
)
{
result
=
data_ptr
[
i
];
out
<<
result
<<
std
::
endl
;
}
}
out
.
close
();
}
//////////////////////////////////////////////////////
void
dump_stride
(
std
::
string
filename
,
Tensor
input_tensor
,
const
int
dumpnum
,
bool
use_chw
)
{
PaddleMobileConfig
config1
=
GetConfig1
();
static
int
i
=
0
;
auto
predictor1
=
if
(
input_tensor
.
numel
()
==
0
)
{
CreatePaddlePredictor
<
PaddleMobileConfig
,
return
;
PaddleEngineKind
::
kPaddleMobile
>
(
config1
);
}
if
(
input_tensor
.
type
()
==
typeid
(
float
))
{
std
::
cout
<<
"Finishing loading model"
<<
std
::
endl
;
DLOG
<<
"op: "
<<
i
++
<<
", float data "
<<
input_tensor
.
numel
();
int
img_length1
=
224
*
224
*
3
;
auto
img1
=
reinterpret_cast
<
float
*>
(
fpga_malloc
(
img_length1
*
sizeof
(
float
)));
std
::
cout
<<
"Finishing initializing data"
<<
std
::
endl
;
struct
PaddleTensor
t_img1
;
dump_stride_float
(
filename
,
input_tensor
,
dumpnum
);
}
else
{
DLOG
<<
"op: "
<<
i
++
<<
", half data "
<<
input_tensor
.
numel
();
t_img1
.
dtypeid
=
typeid
(
float
);
dump_stride_half
(
filename
,
input_tensor
,
dumpnum
,
use_chw
);
t_img1
.
layout
=
LAYOUT_HWC
;
}
t_img1
.
shape
=
std
::
vector
<
int
>
({
1
,
224
,
224
,
3
});
DLOG
<<
"dump input address: "
<<
input_tensor
.
get_data
();
t_img1
.
name
=
"Image information"
;
}
t_img1
.
data
.
Reset
(
img1
,
img_length1
*
sizeof
(
float
));
predictor1
->
FeedPaddleTensors
({
t_img1
});
predictor1
->
Predict_From_To
(
0
,
-
1
);
std
::
cout
<<
"Finishing predicting "
<<
std
::
endl
;
std
::
vector
<
PaddleTensor
>
v1
;
// No need to initialize v
static
const
char
*
g_rfcn_combine
=
"../models/rfcn"
;
predictor1
->
FetchPaddleTensors
(
&
v1
);
// Old data in v will be cleared
static
const
char
*
g_image_src_float
=
"../models/rfcn/data.bin"
;
std
::
cout
<<
"Output number is "
<<
v1
.
size
()
<<
std
::
endl
;
int
main
()
{
std
::
cout
<<
"out[0] length "
<<
v1
[
0
].
data
.
length
()
<<
std
::
endl
;
paddle_mobile
::
fpga
::
open_device
();
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
FPGA
>
paddle_mobile
;
if
(
paddle_mobile
.
Load
(
std
::
string
(
g_rfcn_combine
)
+
"/model"
,
std
::
string
(
g_rfcn_combine
)
+
"/params"
,
true
,
false
,
1
,
true
))
{
float
img_info
[
3
]
=
{
768
,
1536
,
768.0
f
/
960.0
f
};
auto
img
=
reinterpret_cast
<
float
*>
(
fpga
::
fpga_malloc
(
768
*
1536
*
3
*
sizeof
(
float
)));
readStream
(
g_image_src_float
,
reinterpret_cast
<
char
*>
(
img
));
std
::
vector
<
void
*>
v
(
3
,
nullptr
);
paddle_mobile
.
FeedData
(
std
::
vector
<
void
*>
({
img_info
,
img
}));
paddle_mobile
.
Predict_To
(
-
1
);
for
(
int
i
=
65
;
i
<
69
;
i
++
)
{
auto
tensor_ptr
=
paddle_mobile
.
FetchResult
(
i
);
std
::
string
saveName
=
"rfcn_"
+
std
::
to_string
(
i
);
paddle_mobile
::
fpga
::
fpga_invalidate
((
*
tensor_ptr
).
get_data
(),
tensor_ptr
->
numel
()
*
sizeof
(
float
));
dump_stride
(
saveName
,
(
*
tensor_ptr
),
tensor_ptr
->
numel
(),
true
);
}
// paddle_mobile.GetResults(&v);
DLOG
<<
"Computation done"
;
fpga
::
fpga_free
(
img
);
}
return
0
;
return
0
;
}
}
test/net/test_benchmark.cpp
浏览文件 @
4fb30240
...
@@ -36,7 +36,10 @@ int main(int argc, char* argv[]) {
...
@@ -36,7 +36,10 @@ int main(int argc, char* argv[]) {
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
>
paddle_mobile
;
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
>
paddle_mobile
;
paddle_mobile
.
SetThreadNum
(
thread_num
);
paddle_mobile
.
SetThreadNum
(
thread_num
);
auto
time1
=
time
();
auto
time1
=
time
();
if
(
paddle_mobile
.
Load
(
fluid_model
,
optimize
))
{
// if (paddle_mobile.Load(fluid_model, optimize, false, 1, true)) {
if
(
paddle_mobile
.
Load
(
std
::
string
(
fluid_model
)
+
"/model"
,
std
::
string
(
fluid_model
)
+
"/params"
,
optimize
,
false
,
1
,
true
))
{
auto
time2
=
time
();
auto
time2
=
time
();
std
::
cout
<<
"load cost :"
<<
time_diff
(
time1
,
time2
)
<<
"ms
\n
"
;
std
::
cout
<<
"load cost :"
<<
time_diff
(
time1
,
time2
)
<<
"ms
\n
"
;
paddle_mobile
::
framework
::
Tensor
input
;
paddle_mobile
::
framework
::
Tensor
input
;
...
@@ -51,14 +54,15 @@ int main(int argc, char* argv[]) {
...
@@ -51,14 +54,15 @@ int main(int argc, char* argv[]) {
paddle_mobile
::
framework
::
DDim
in_shape
=
paddle_mobile
::
framework
::
DDim
in_shape
=
paddle_mobile
::
framework
::
make_ddim
(
dims
);
paddle_mobile
::
framework
::
make_ddim
(
dims
);
SetupTensor
<
float
>
(
&
input
,
in_shape
,
0.
f
,
255.
f
);
SetupTensor
<
float
>
(
&
input
,
in_shape
,
0.
f
,
255.
f
);
// warmup
//
// warmup
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
for
(
int
i
=
0
;
i
<
2
;
++
i
)
{
paddle_mobile
.
Predict
(
input
);
paddle_mobile
.
Predict
(
input
);
}
}
auto
time3
=
time
();
auto
time3
=
time
();
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
paddle_mobile
.
Predict
(
input
);
paddle_mobile
.
Predict
(
input
);
}
}
auto
time4
=
time
();
auto
time4
=
time
();
std
::
cout
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
/
10
<<
"ms
\n
"
;
std
::
cout
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
/
10
<<
"ms
\n
"
;
std
::
ostringstream
os
(
"output tensor size: "
);
std
::
ostringstream
os
(
"output tensor size: "
);
...
@@ -68,7 +72,7 @@ int main(int argc, char* argv[]) {
...
@@ -68,7 +72,7 @@ int main(int argc, char* argv[]) {
os
<<
", "
<<
output
->
data
<
float
>
()[
i
];
os
<<
", "
<<
output
->
data
<
float
>
()[
i
];
}
}
std
::
string
output_str
=
os
.
str
();
std
::
string
output_str
=
os
.
str
();
std
::
cout
<<
output_str
<<
std
::
endl
;
//
std::cout << output_str << std::endl;
}
}
return
0
;
return
0
;
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录