Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
9729edac
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
338
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9729edac
编写于
12月 15, 2018
作者:
H
hjchen2
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Support feed multi inputs and fetch multi outputs
上级
f20c9041
变更
24
隐藏空白更改
内联
并排
Showing
24 changed file
with
634 addition
and
587 deletion
+634
-587
src/framework/executor.cpp
src/framework/executor.cpp
+207
-290
src/framework/executor.h
src/framework/executor.h
+38
-49
src/framework/loader.cpp
src/framework/loader.cpp
+30
-40
src/framework/loader.h
src/framework/loader.h
+21
-21
src/framework/lod_tensor.h
src/framework/lod_tensor.h
+27
-3
src/framework/program/program.h
src/framework/program/program.h
+2
-3
src/framework/scope.h
src/framework/scope.h
+1
-0
src/framework/tensor.h
src/framework/tensor.h
+0
-1
src/io/api_paddle_mobile.cc
src/io/api_paddle_mobile.cc
+22
-22
src/io/api_paddle_mobile.h
src/io/api_paddle_mobile.h
+2
-2
src/io/ios_io/PaddleMobileCPU.mm
src/io/ios_io/PaddleMobileCPU.mm
+4
-3
src/io/jni/paddle_mobile_jni.cpp
src/io/jni/paddle_mobile_jni.cpp
+14
-7
src/io/paddle_mobile.cpp
src/io/paddle_mobile.cpp
+108
-77
src/io/paddle_mobile.h
src/io/paddle_mobile.h
+32
-22
src/io/paddle_test_inference_api.cpp
src/io/paddle_test_inference_api.cpp
+9
-7
src/io/paddle_test_inference_api.h
src/io/paddle_test_inference_api.h
+4
-1
test/CMakeLists.txt
test/CMakeLists.txt
+4
-1
test/executor_for_test.h
test/executor_for_test.h
+20
-27
test/net/test_benchmark.cpp
test/net/test_benchmark.cpp
+3
-2
test/net/test_eng.cpp
test/net/test_eng.cpp
+2
-2
test/net/test_googlenet.cpp
test/net/test_googlenet.cpp
+2
-2
test/net/test_nlp.cpp
test/net/test_nlp.cpp
+4
-4
test/net/test_ocr.cpp
test/net/test_ocr.cpp
+77
-0
tools/pre-commit.hooks/cpplint.hook
tools/pre-commit.hooks/cpplint.hook
+1
-1
未找到文件。
src/framework/executor.cpp
浏览文件 @
9729edac
...
@@ -28,11 +28,6 @@ limitations under the License. */
...
@@ -28,11 +28,6 @@ limitations under the License. */
#include "framework/tensor.h"
#include "framework/tensor.h"
#include "memory/t_malloc.h"
#include "memory/t_malloc.h"
#ifdef PADDLE_EXECUTOR_MULTITHREAD
#include <queue>
#include "common/threadpool.h"
#endif
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
#include "framework/cl/cl_image.h"
#include "framework/cl/cl_image.h"
#endif
#endif
...
@@ -40,66 +35,67 @@ limitations under the License. */
...
@@ -40,66 +35,67 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
framework
{
namespace
framework
{
using
framework
::
Variable
;
using
framework
::
Variable
;
#pragma mark - executor
#pragma mark - executor
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
Executor
<
D
type
,
P
>::
Executor
(
const
framework
::
Program
<
Dtype
>
p
,
int
batch_size
,
Executor
<
D
evice
,
T
>::
Executor
(
const
Program
<
Device
>
&
program
,
int
batch_size
,
const
bool
use_optimize
,
const
bool
loddabl
e
)
const
bool
use_optimize
,
const
bool
lod_mod
e
)
:
program_
(
p
),
:
program_
(
p
rogram
),
batch_size_
(
batch_size
),
batch_size_
(
batch_size
),
use_optimize_
(
use_optimize
),
use_optimize_
(
use_optimize
),
loddable_
(
loddable
)
{
lod_mode_
(
lod_mode
)
{
DLOG
<<
"executor in lod mode: "
<<
lod_mode_
;
Variable
*
variable_ptr
=
program_
.
scope
->
Var
(
"batch_size"
);
Variable
*
variable_ptr
=
program_
.
scope
->
Var
(
"batch_size"
);
variable_ptr
->
SetValue
<
int
>
(
batch_size
);
variable_ptr
->
SetValue
<
int
>
(
batch_size
);
to_predict_program_
=
program_desc_
=
use_optimize_
?
program_
.
optimizeProgram
:
program_
.
originProgram
;
use_optimize_
?
program_
.
optimizeProgram
:
program_
.
originProgram
;
PADDLE_MOBILE_ENFORCE
(
to_predict_program
_
!=
nullptr
,
PADDLE_MOBILE_ENFORCE
(
program_desc
_
!=
nullptr
,
"
to_predict_program_ == NULL!
"
);
"
program_desc_ should not be nullptr
"
);
const
std
::
vector
<
std
::
shared_ptr
<
framework
::
BlockDesc
>>
&
blocks
=
const
auto
&
blocks
=
program_desc_
->
Blocks
();
to_predict_program_
->
Blocks
(
);
ops_of_block_
.
resize
(
blocks
.
size
()
);
DLOG
<<
"executor in loaddable mode: "
<<
loddable_
;
for
(
int
i
=
0
;
i
<
blocks
.
size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
blocks
.
size
();
++
i
)
{
std
::
shared_ptr
<
framework
::
BlockDesc
>
block_desc
=
blocks
[
i
];
std
::
shared_ptr
<
BlockDesc
>
block_desc
=
blocks
[
i
];
std
::
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
ops
=
block_desc
->
Ops
();
std
::
vector
<
std
::
shared_ptr
<
OpDesc
>>
ops
=
block_desc
->
Ops
();
for
(
int
j
=
0
;
j
<
ops
.
size
();
++
j
)
{
for
(
int
j
=
0
;
j
<
ops
.
size
();
++
j
)
{
std
::
shared_ptr
<
framework
::
OpDesc
>
op
=
ops
[
j
];
std
::
shared_ptr
<
OpDesc
>
op_desc
=
ops
[
j
];
DLOG
<<
"create op: "
<<
op
->
Type
();
DLOG
<<
"create op: "
<<
op
_desc
->
Type
();
auto
op_
base
=
framework
::
OpRegistry
<
Dtyp
e
>::
CreateOp
(
auto
op_
handler
=
OpRegistry
<
Devic
e
>::
CreateOp
(
op
->
Type
(),
op
->
GetInputs
(),
op
->
GetOutputs
(),
op
->
GetAttrMap
(),
op
_desc
->
Type
(),
op_desc
->
GetInputs
(),
op_desc
->
GetOutputs
(),
program_
.
scope
);
op_desc
->
GetAttrMap
(),
program_
.
scope
);
// infer shape to reshape
tensor
before predict,
// infer shape to reshape
inputs and outputs
before predict,
// but for lod
tensor, it will still need to re
shape in runtime
// but for lod
mode, it still need to infer
shape in runtime
if
(
!
lod
dable_
)
{
if
(
!
lod
_mode
)
{
op_
base
->
InferShape
();
op_
handler
->
InferShape
();
}
}
ops_of_block_
[
*
block_desc
.
get
()].
push_back
(
op_base
);
ops_of_block_
[
i
].
push_back
(
op_handler
);
}
}
}
}
if
(
program_
.
combined
)
{
if
(
program_
.
combined
)
{
InitCombineMemory
();
InitCombineMemory
();
}
else
{
}
else
{
InitMemory
();
InitMemory
();
}
}
std
::
shared_ptr
<
framework
::
BlockDesc
>
to_predict_block
=
to_predict_program_
->
Block
(
0
);
int
count
=
0
;
int
i
=
0
;
for
(
int
block_id
=
0
;
block_id
<
ops_of_block_
.
size
();
++
block_id
)
{
auto
&
ops
=
ops_of_block_
[
*
to_predict_block
.
get
()];
for
(
auto
&
op_handler
:
ops_of_block_
[
block_id
])
{
for
(
const
auto
&
op
:
ops
)
{
DLOG
<<
"Initialize op["
<<
count
++
<<
"]: "
<<
op_handler
->
Type
();
DLOG
<<
"Initialize op["
<<
i
++
<<
"]: "
<<
op
->
Type
();
op_handler
->
Init
();
op
->
Init
();
ops_list_
.
push_back
(
op_handler
);
}
}
}
}
}
template
<
typename
D
typ
e
>
template
<
typename
D
evic
e
>
static
void
LoadMemInternal
(
void
**
data
,
framework
::
LoDTensor
*
tensor
,
static
void
LoadMemInternal
(
void
**
data
,
LoDTensor
*
tensor
,
bool
quant_uint8
=
false
)
{
bool
quant_uint8
=
false
)
{
char
**
data_buf
=
reinterpret_cast
<
char
**>
(
data
);
char
**
data_buf
=
reinterpret_cast
<
char
**>
(
data
);
int64_t
size
=
tensor
->
numel
();
int64_t
size
=
tensor
->
numel
();
D
type
*
tensor_data
=
tensor
->
mutable_data
<
Dtyp
e
>
();
D
evice
*
tensor_data
=
tensor
->
mutable_data
<
Devic
e
>
();
if
(
quant_uint8
)
{
if
(
quant_uint8
)
{
// should be moved into operator init function
// should be moved into operator init function
float
min_value
;
float
min_value
;
...
@@ -114,15 +110,15 @@ static void LoadMemInternal(void **data, framework::LoDTensor *tensor,
...
@@ -114,15 +110,15 @@ static void LoadMemInternal(void **data, framework::LoDTensor *tensor,
}
}
data_buf
+=
size
*
sizeof
(
uint8_t
);
data_buf
+=
size
*
sizeof
(
uint8_t
);
}
else
{
}
else
{
memory
::
Copy
(
tensor_data
,
*
data_buf
,
size
*
sizeof
(
D
typ
e
));
memory
::
Copy
(
tensor_data
,
*
data_buf
,
size
*
sizeof
(
D
evic
e
));
*
data_buf
+=
size
*
sizeof
(
D
typ
e
);
*
data_buf
+=
size
*
sizeof
(
D
evic
e
);
}
}
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
void
Executor
<
D
type
,
P
>::
LoadMemory
(
void
Executor
<
D
evice
,
T
>::
LoadMemory
(
void
**
data
,
void
**
data
,
const
std
::
shared_ptr
<
framework
::
VarDesc
>
var_desc
,
const
std
::
shared_ptr
<
VarDesc
>
var_desc
,
framework
::
LoDTensor
*
tensor
)
{
LoDTensor
*
tensor
)
{
char
**
data_buf
=
reinterpret_cast
<
char
**>
(
data
);
char
**
data_buf
=
reinterpret_cast
<
char
**>
(
data
);
// version
// version
uint32_t
version
=
*
(
reinterpret_cast
<
uint32_t
*>
(
*
data_buf
));
uint32_t
version
=
*
(
reinterpret_cast
<
uint32_t
*>
(
*
data_buf
));
...
@@ -152,18 +148,18 @@ void Executor<Dtype, P>::LoadMemory(
...
@@ -152,18 +148,18 @@ void Executor<Dtype, P>::LoadMemory(
// skip tensor desc
// skip tensor desc
*
data_buf
+=
tensor_desc_size
;
*
data_buf
+=
tensor_desc_size
;
const
framework
::
TensorDesc
&
tensor_desc
=
var_desc
->
Tensor_desc
();
const
TensorDesc
&
tensor_desc
=
var_desc
->
Tensor_desc
();
tensor
->
Resize
(
framework
::
make_ddim
(
tensor_desc
.
Dims
()));
tensor
->
Resize
(
make_ddim
(
tensor_desc
.
Dims
()));
// parse tensor from stream
// parse tensor from stream
switch
(
tensor_desc
.
DataType
())
{
switch
(
tensor_desc
.
DataType
())
{
case
framework
::
VARTYPE_TYPE_FP32
:
case
VARTYPE_TYPE_FP32
:
LoadMemInternal
<
float
>
(
reinterpret_cast
<
void
**>
(
data_buf
),
tensor
,
LoadMemInternal
<
float
>
(
reinterpret_cast
<
void
**>
(
data_buf
),
tensor
,
program_
.
quantification
);
program_
.
quantification
);
break
;
break
;
case
framework
::
VARTYPE_TYPE_INT8
:
case
VARTYPE_TYPE_INT8
:
LoadMemInternal
<
int8_t
>
(
reinterpret_cast
<
void
**>
(
data_buf
),
tensor
);
LoadMemInternal
<
int8_t
>
(
reinterpret_cast
<
void
**>
(
data_buf
),
tensor
);
break
;
break
;
case
framework
::
VARTYPE_TYPE_INT32
:
case
VARTYPE_TYPE_INT32
:
LoadMemInternal
<
int
>
(
reinterpret_cast
<
void
**>
(
data_buf
),
tensor
);
LoadMemInternal
<
int
>
(
reinterpret_cast
<
void
**>
(
data_buf
),
tensor
);
break
;
break
;
default:
default:
...
@@ -171,12 +167,12 @@ void Executor<Dtype, P>::LoadMemory(
...
@@ -171,12 +167,12 @@ void Executor<Dtype, P>::LoadMemory(
}
}
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
void
Executor
<
D
type
,
P
>::
InitMemory
()
{
void
Executor
<
D
evice
,
T
>::
InitMemory
()
{
for
(
const
auto
&
block
:
to_predict_program
_
->
Blocks
())
{
for
(
const
auto
&
block
:
program_desc
_
->
Blocks
())
{
for
(
const
auto
&
var_desc
:
block
->
Vars
())
{
for
(
const
auto
&
var_desc
:
block
->
Vars
())
{
auto
var
=
program_
.
scope
->
Var
(
var_desc
->
Name
());
auto
var
=
program_
.
scope
->
Var
(
var_desc
->
Name
());
auto
tensor
=
var
->
template
GetMutable
<
framework
::
LoDTensor
>();
auto
tensor
=
var
->
template
GetMutable
<
LoDTensor
>();
if
(
var_desc
->
Persistable
())
{
if
(
var_desc
->
Persistable
())
{
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
continue
;
continue
;
...
@@ -187,7 +183,7 @@ void Executor<Dtype, P>::InitMemory() {
...
@@ -187,7 +183,7 @@ void Executor<Dtype, P>::InitMemory() {
LoadMemory
(
reinterpret_cast
<
void
**>
(
&
data
),
var_desc
,
tensor
);
LoadMemory
(
reinterpret_cast
<
void
**>
(
&
data
),
var_desc
,
tensor
);
delete
[]
origin_data
;
delete
[]
origin_data
;
}
else
{
}
else
{
if
(
var_desc
->
Type
()
==
framework
::
VARTYPE_TYPE_LOD_TENSOR
)
{
if
(
var_desc
->
Type
()
==
VARTYPE_TYPE_LOD_TENSOR
)
{
varInputMemory
(
var_desc
,
var
,
tensor
);
varInputMemory
(
var_desc
,
var
,
tensor
);
}
}
}
}
...
@@ -195,8 +191,8 @@ void Executor<Dtype, P>::InitMemory() {
...
@@ -195,8 +191,8 @@ void Executor<Dtype, P>::InitMemory() {
}
}
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
void
Executor
<
D
type
,
P
>::
InitCombineMemory
()
{
void
Executor
<
D
evice
,
T
>::
InitCombineMemory
()
{
char
*
origin_data
=
nullptr
;
char
*
origin_data
=
nullptr
;
bool
self_alloc
=
false
;
bool
self_alloc
=
false
;
if
(
program_
.
combined_params_buf
&&
program_
.
combined_params_len
)
{
if
(
program_
.
combined_params_buf
&&
program_
.
combined_params_len
)
{
...
@@ -208,17 +204,17 @@ void Executor<Dtype, P>::InitCombineMemory() {
...
@@ -208,17 +204,17 @@ void Executor<Dtype, P>::InitCombineMemory() {
}
}
PADDLE_MOBILE_ENFORCE
(
origin_data
!=
nullptr
,
"data == nullptr"
);
PADDLE_MOBILE_ENFORCE
(
origin_data
!=
nullptr
,
"data == nullptr"
);
char
*
data
=
origin_data
;
char
*
data
=
origin_data
;
for
(
const
auto
&
block
:
to_predict_program
_
->
Blocks
())
{
for
(
const
auto
&
block
:
program_desc
_
->
Blocks
())
{
for
(
const
auto
&
var_desc
:
block
->
Vars
())
{
for
(
const
auto
&
var_desc
:
block
->
Vars
())
{
auto
var
=
program_
.
scope
->
Var
(
var_desc
->
Name
());
auto
var
=
program_
.
scope
->
Var
(
var_desc
->
Name
());
auto
tensor
=
var
->
template
GetMutable
<
framework
::
LoDTensor
>();
auto
tensor
=
var
->
template
GetMutable
<
LoDTensor
>();
if
(
var_desc
->
Persistable
())
{
if
(
var_desc
->
Persistable
())
{
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
continue
;
continue
;
}
}
LoadMemory
(
reinterpret_cast
<
void
**>
(
&
data
),
var_desc
,
tensor
);
LoadMemory
(
reinterpret_cast
<
void
**>
(
&
data
),
var_desc
,
tensor
);
}
else
{
}
else
{
if
(
var_desc
->
Type
()
==
framework
::
VARTYPE_TYPE_LOD_TENSOR
)
{
if
(
var_desc
->
Type
()
==
VARTYPE_TYPE_LOD_TENSOR
)
{
varInputMemory
(
var_desc
,
var
,
tensor
);
varInputMemory
(
var_desc
,
var
,
tensor
);
}
}
}
}
...
@@ -230,168 +226,132 @@ void Executor<Dtype, P>::InitCombineMemory() {
...
@@ -230,168 +226,132 @@ void Executor<Dtype, P>::InitCombineMemory() {
LOG
(
kLOG_INFO
)
<<
"init combine memory finish"
;
LOG
(
kLOG_INFO
)
<<
"init combine memory finish"
;
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
bool
Executor
<
D
type
,
P
>::
varInputMemory
(
bool
Executor
<
D
evice
,
T
>::
varInputMemory
(
const
std
::
shared_ptr
<
framework
::
VarDesc
>
&
var_desc
,
Variable
*
var
,
const
std
::
shared_ptr
<
VarDesc
>
&
var_desc
,
Variable
*
var
,
framework
::
LoDTensor
*
tensor
)
const
{
LoDTensor
*
tensor
)
const
{
auto
type
=
var_desc
->
Tensor_desc
().
DataType
();
auto
type
=
var_desc
->
Tensor_desc
().
DataType
();
switch
(
type
)
{
switch
(
type
)
{
case
framework
::
VARTYPE_TYPE_FP32
:
case
VARTYPE_TYPE_FP32
:
tensor
->
mutable_data
<
float
>
();
tensor
->
mutable_data
<
float
>
();
break
;
break
;
case
framework
::
VARTYPE_TYPE_INT8
:
case
VARTYPE_TYPE_INT8
:
tensor
->
mutable_data
<
int8_t
>
();
tensor
->
mutable_data
<
int8_t
>
();
break
;
break
;
case
framework
::
VARTYPE_TYPE_INT32
:
case
VARTYPE_TYPE_INT32
:
tensor
->
mutable_data
<
int32_t
>
();
tensor
->
mutable_data
<
int32_t
>
();
break
;
break
;
case
framework
::
VARTYPE_TYPE_INT64
:
case
VARTYPE_TYPE_INT64
:
tensor
->
mutable_data
<
int64_t
>
();
tensor
->
mutable_data
<
int64_t
>
();
break
;
break
;
default:
default:
break
;
break
;
}
}
bool
is_mute_match
=
(
type
==
framework
::
VARTYPE_TYPE_FP32
)
||
bool
is_mute_match
=
(
type
==
framework
::
VARTYPE_TYPE_INT8
)
||
(
type
==
VARTYPE_TYPE_FP32
)
||
(
type
==
VARTYPE_TYPE_INT8
)
||
(
type
==
framework
::
VARTYPE_TYPE_INT32
)
||
(
type
==
VARTYPE_TYPE_INT32
)
||
(
type
==
VARTYPE_TYPE_INT64
);
(
type
==
framework
::
VARTYPE_TYPE_INT64
);
PADDLE_MOBILE_ENFORCE
(
is_mute_match
,
"got unhandled data type : %d"
,
type
);
PADDLE_MOBILE_ENFORCE
(
is_mute_match
,
"got unhandled data type : %d"
,
type
);
return
is_mute_match
;
return
is_mute_match
;
}
}
template
<
typename
Dtype
,
Precision
P
>
template
<
typename
Device
,
typename
T
>
std
::
shared_ptr
<
framework
::
Tensor
>
Executor
<
Dtype
,
P
>::
Predict
(
PMStatus
Executor
<
Device
,
T
>::
Predict
(
const
framework
::
Tensor
&
t
)
{
const
std
::
vector
<
std
::
pair
<
std
::
string
,
Tensor
>>
&
inputs
)
{
framework
::
Variable
*
g_feed_value
=
program_
.
scope
->
Var
(
"feed"
);
for
(
const
auto
&
input
:
inputs
)
{
framework
::
Tensor
*
feed_tensor
=
SetInput
(
input
.
second
,
input
.
first
);
g_feed_value
->
GetMutable
<
framework
::
LoDTensor
>
();
feed_tensor
->
Resize
(
t
.
dims
());
feed_tensor
->
ShareDataWith
(
t
);
std
::
shared_ptr
<
framework
::
BlockDesc
>
to_predict_block
=
to_predict_program_
->
Block
(
0
);
auto
&
ops
=
ops_of_block_
[
*
to_predict_block
.
get
()];
#ifdef PADDLE_MOBILE_PROFILE
std
::
vector
<
ProfInfo
>
profile
(
ops
.
size
());
#endif
for
(
int
i
=
0
;
i
<
ops
.
size
();
i
++
)
{
#ifdef PADDLE_MOBILE_PROFILE
struct
timespec
ts
;
clock_gettime
(
CLOCK_MONOTONIC
,
&
ts
);
profile
[
i
].
runBegin
=
(
uint64_t
)
ts
.
tv_sec
*
1e9
+
ts
.
tv_nsec
;
#endif
if
(
loddable_
)
{
ops
[
i
]
->
InferShape
();
}
// to Run
ops
[
i
]
->
Run
();
#ifdef PADDLE_MOBILE_PROFILE
clock_gettime
(
CLOCK_MONOTONIC
,
&
ts
);
profile
[
i
].
runEnd
=
(
uint64_t
)
ts
.
tv_sec
*
1e9
+
ts
.
tv_nsec
;
#endif
}
auto
last_op
=
ops
.
rbegin
();
auto
output_map
=
(
*
last_op
)
->
Outputs
();
std
::
vector
<
std
::
string
>
out_keys
=
(
*
last_op
)
->
GetOutKeys
();
PADDLE_MOBILE_ENFORCE
(
out_keys
.
size
()
>
0
,
"the last op contains no output"
);
framework
::
LoDTensor
*
output_tensor
=
framework
::
GetVarValue
<
framework
::
LoDTensor
>
(
out_keys
[
0
],
output_map
,
*
(
program_
.
scope
));
#ifdef PADDLE_MOBILE_PROFILE
std
::
unordered_map
<
std
::
string
,
uint64_t
>
_tp
;
for
(
int
i
=
0
;
i
<
profile
.
size
();
i
++
)
{
const
auto
&
pInfo
=
profile
[
i
];
uint64_t
timeCost
=
pInfo
.
runEnd
-
pInfo
.
runBegin
;
if
(
ops
[
i
]
->
Type
()
==
"conv2d"
)
{
auto
inputs
=
ops
[
i
]
->
Inputs
();
auto
*
filter
=
framework
::
GetVarValue
<
framework
::
LoDTensor
>
(
"Filter"
,
inputs
,
*
(
program_
.
scope
));
int
kernel_size
=
filter
->
dims
()[
2
];
_tp
[
ops
[
i
]
->
Type
()
+
"_"
+
std
::
to_string
(
kernel_size
)]
+=
timeCost
;
}
else
{
_tp
[
ops
[
i
]
->
Type
()]
+=
timeCost
;
}
}
printf
(
"====================[ profile ]======================
\n
"
);
using
prof_t
=
std
::
pair
<
std
::
string
,
uint64_t
>
;
std
::
vector
<
prof_t
>
_tv
(
_tp
.
begin
(),
_tp
.
end
());
uint64_t
_ptotal
=
0
;
for
(
auto
const
&
p
:
_tv
)
{
_ptotal
+=
p
.
second
;
}
}
auto
compf
=
[](
const
prof_t
&
a
,
const
prof_t
&
b
)
{
return
this
->
Predict
();
return
a
.
second
>
b
.
second
;
}
};
std
::
sort
(
_tv
.
begin
(),
_tv
.
end
(),
compf
);
template
<
typename
Device
,
typename
T
>
_tv
.
push_back
(
std
::
make_pair
(
"total"
,
_ptotal
));
PMStatus
Executor
<
Device
,
T
>::
Predict
(
for
(
auto
const
&
p
:
_tv
)
{
const
std
::
vector
<
std
::
pair
<
std
::
string
,
LoDTensor
>>
&
inputs
)
{
printf
(
"%-16s
\t
%-10.0f
\t
%-2.4f
\n
"
,
p
.
first
.
c_str
(),
for
(
const
auto
&
input
:
inputs
)
{
static_cast
<
float
>
(
p
.
second
),
SetInput
(
input
.
second
,
input
.
first
);
static_cast
<
float
>
(
p
.
second
)
/
_ptotal
*
100.0
);
}
}
printf
(
"====================[---------]======================
\n
"
);
return
this
->
Predict
();
#endif
return
std
::
make_shared
<
framework
::
Tensor
>
(
framework
::
Tensor
(
*
output_tensor
));
}
}
template
<
typename
Dtype
,
Precision
P
>
template
<
typename
Device
,
typename
T
>
std
::
shared_ptr
<
framework
::
LoDTensor
>
Executor
<
Dtype
,
P
>::
PredictLod
(
std
::
vector
<
T
>
Executor
<
Device
,
T
>::
Predict
(
const
std
::
vector
<
T
>
&
input
,
const
framework
::
LoDTensor
&
t
)
{
const
std
::
vector
<
int64_t
>
&
dims
)
{
framework
::
Variable
*
g_feed_value
=
program_
.
scope
->
Var
(
"feed"
);
Tensor
feed_tensor
(
input
,
make_ddim
(
dims
));
framework
::
LoDTensor
*
feed_tensor
=
SetInput
(
feed_tensor
,
"feed"
);
g_feed_value
->
GetMutable
<
framework
::
LoDTensor
>
();
std
::
vector
<
T
>
output
;
feed_tensor
->
Resize
(
t
.
dims
());
if
(
this
->
Predict
()
==
PMSuccess
)
{
feed_tensor
->
ShareDataWith
(
t
);
const
auto
output_tensor
=
GetOutput
(
"fetch"
);
feed_tensor
->
set_lod
(
t
.
lod
());
output
.
resize
(
output_tensor
->
numel
());
memcpy
(
output
.
data
(),
output_tensor
->
template
data
<
T
>(),
output
.
size
()
*
sizeof
(
T
));
}
return
output
;
}
std
::
shared_ptr
<
framework
::
BlockDesc
>
to_predict_block
=
template
<
typename
Device
,
typename
T
>
to_predict_program_
->
Block
(
0
);
void
Executor
<
Device
,
T
>::
SetInput
(
const
Tensor
&
input
,
const
std
::
string
&
var_name
)
{
auto
*
target_var
=
program_
.
scope
->
FindVar
(
var_name
);
PADDLE_MOBILE_ENFORCE
(
target_var
!=
nullptr
,
"Variable %s is not exist"
,
var_name
.
c_str
());
auto
*
target_tensor
=
target_var
->
template
GetMutable
<
LoDTensor
>();
target_tensor
->
Resize
(
input
.
dims
());
target_tensor
->
ShareDataWith
(
input
);
}
auto
&
ops
=
ops_of_block_
[
*
to_predict_block
.
get
()];
template
<
typename
Device
,
typename
T
>
void
Executor
<
Device
,
T
>::
SetInput
(
const
LoDTensor
&
input
,
const
std
::
string
&
var_name
)
{
auto
*
target_var
=
program_
.
scope
->
FindVar
(
var_name
);
PADDLE_MOBILE_ENFORCE
(
target_var
!=
nullptr
,
"Variable %s is not exist"
,
var_name
.
c_str
());
auto
*
target_tensor
=
target_var
->
template
GetMutable
<
LoDTensor
>();
target_tensor
->
Resize
(
input
.
dims
());
target_tensor
->
ShareDataWith
(
input
);
target_tensor
->
set_lod
(
input
.
lod
());
}
template
<
typename
Device
,
typename
T
>
PMStatus
Executor
<
Device
,
T
>::
Predict
()
{
#ifdef PADDLE_MOBILE_PROFILE
#ifdef PADDLE_MOBILE_PROFILE
std
::
vector
<
ProfInfo
>
profile
(
ops
.
size
());
std
::
vector
<
ProfInfo
>
profile
(
ops_list_
.
size
());
struct
timespec
ts
;
int
op_index
=
0
;
#endif
#endif
for
(
int
i
=
0
;
i
<
ops
.
size
();
i
++
)
{
for
(
auto
&
block
:
ops_of_block_
)
{
for
(
auto
&
op_handler
:
block
)
{
#ifdef PADDLE_MOBILE_PROFILE
#ifdef PADDLE_MOBILE_PROFILE
struct
timespec
ts
;
clock_gettime
(
CLOCK_MONOTONIC
,
&
ts
);
clock_gettime
(
CLOCK_MONOTONIC
,
&
ts
);
profile
[
op_index
].
runBegin
=
(
uint64_t
)
ts
.
tv_sec
*
1e9
+
ts
.
tv_nsec
;
profile
[
i
].
runBegin
=
(
uint64_t
)
ts
.
tv_sec
*
1e9
+
ts
.
tv_nsec
;
#endif
#endif
if
(
loddabl
e_
)
{
if
(
lod_mod
e_
)
{
ops
[
i
]
->
InferShape
();
op_handler
->
InferShape
();
}
}
ops
[
i
]
->
Run
();
op_handler
->
Run
();
#ifdef PADDLE_MOBILE_PROFILE
#ifdef PADDLE_MOBILE_PROFILE
clock_gettime
(
CLOCK_MONOTONIC
,
&
ts
);
clock_gettime
(
CLOCK_MONOTONIC
,
&
ts
);
profile
[
i
].
runEnd
=
(
uint64_t
)
ts
.
tv_sec
*
1e9
+
ts
.
tv_nsec
;
profile
[
op_index
].
runEnd
=
(
uint64_t
)
ts
.
tv_sec
*
1e9
+
ts
.
tv_nsec
;
++
op_index
;
#endif
#endif
}
}
}
auto
last_op
=
ops
.
rbegin
();
auto
output_map
=
(
*
last_op
)
->
Outputs
();
std
::
vector
<
std
::
string
>
out_keys
=
(
*
last_op
)
->
GetOutKeys
();
PADDLE_MOBILE_ENFORCE
(
out_keys
.
size
()
>
0
,
"the last op contains no output"
);
framework
::
LoDTensor
*
output_tensor
=
framework
::
GetVarValue
<
framework
::
LoDTensor
>
(
out_keys
[
0
],
output_map
,
*
(
program_
.
scope
));
#ifdef PADDLE_MOBILE_PROFILE
#ifdef PADDLE_MOBILE_PROFILE
std
::
unordered_map
<
std
::
string
,
uint64_t
>
_tp
;
std
::
unordered_map
<
std
::
string
,
uint64_t
>
_tp
;
for
(
int
i
=
0
;
i
<
profile
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
profile
.
size
();
i
++
)
{
const
auto
&
pInfo
=
profile
[
i
];
const
auto
&
pInfo
=
profile
[
i
];
uint64_t
timeCost
=
pInfo
.
runEnd
-
pInfo
.
runBegin
;
uint64_t
timeCost
=
pInfo
.
runEnd
-
pInfo
.
runBegin
;
if
(
ops
[
i
]
->
Type
()
==
"conv2d"
)
{
if
(
ops
_list_
[
i
]
->
Type
()
==
"conv2d"
||
auto
inputs
=
ops
[
i
]
->
Inputs
();
ops_list_
[
i
]
->
Type
()
==
"depthwise_conv2d"
)
{
auto
input
_keys
=
ops
[
i
]
->
GetInputKey
s
();
auto
input
s
=
ops_list_
[
i
]
->
Input
s
();
auto
*
filter
=
framework
::
GetVarValue
<
framework
::
LoDTensor
>
(
auto
*
filter
=
input_keys
[
1
]
,
inputs
,
*
(
program_
.
scope
));
GetVarValue
<
LoDTensor
>
(
"Filter"
,
inputs
,
*
(
program_
.
scope
));
int
kernel_size
=
filter
->
dims
()[
2
];
int
kernel_size
=
filter
->
dims
()[
2
];
printf
(
"kernel size: %d
\n
"
,
kernel_size
);
_tp
[
ops_list_
[
i
]
->
Type
()
+
"_"
+
std
::
to_string
(
kernel_size
)]
+=
timeCost
;
}
else
{
_tp
[
ops_list_
[
i
]
->
Type
()]
+=
timeCost
;
}
}
_tp
[
ops
[
i
]
->
Type
()]
+=
timeCost
;
}
}
printf
(
"====================[ profile ]======================
\n
"
)
;
DLOG
<<
"====================[ profile ]======================"
;
using
prof_t
=
std
::
pair
<
std
::
string
,
uint64_t
>
;
typedef
std
::
pair
<
std
::
string
,
uint64_t
>
prof_t
;
std
::
vector
<
prof_t
>
_tv
(
_tp
.
begin
(),
_tp
.
end
());
std
::
vector
<
prof_t
>
_tv
(
_tp
.
begin
(),
_tp
.
end
());
uint64_t
_ptotal
=
0
;
uint64_t
_ptotal
=
0
;
for
(
auto
const
&
p
:
_tv
)
{
for
(
auto
const
&
p
:
_tv
)
{
...
@@ -407,57 +367,39 @@ std::shared_ptr<framework::LoDTensor> Executor<Dtype, P>::PredictLod(
...
@@ -407,57 +367,39 @@ std::shared_ptr<framework::LoDTensor> Executor<Dtype, P>::PredictLod(
static_cast
<
float
>
(
p
.
second
),
static_cast
<
float
>
(
p
.
second
),
static_cast
<
float
>
(
p
.
second
)
/
_ptotal
*
100.0
);
static_cast
<
float
>
(
p
.
second
)
/
_ptotal
*
100.0
);
}
}
printf
(
"====================[---------]======================
\n
"
)
;
DLOG
<<
"====================[---------]======================"
;
#endif
#endif
return
std
::
make_shared
<
framework
::
LoDTensor
>
(
return
PMSuccess
;
framework
::
LoDTensor
(
*
output_tensor
));
}
}
template
<
typename
Dtype
,
Precision
P
>
template
<
typename
Device
,
typename
T
>
std
::
shared_ptr
<
framework
::
Tensor
>
Executor
<
Dtype
,
P
>::
Predict
(
std
::
shared_ptr
<
LoDTensor
>
Executor
<
Device
,
T
>::
GetOutput
(
const
framework
::
Tensor
&
t
,
int
block_id
)
{
const
std
::
string
&
var_name
)
{
return
Predict
(
t
);
auto
*
target_var
=
program_
.
scope
->
FindVar
(
var_name
);
}
PADDLE_MOBILE_ENFORCE
(
target_var
!=
nullptr
,
"Variable %s is not exist"
,
var_name
.
c_str
());
template
<
typename
Dtype
,
Precision
P
>
auto
*
output_tensor
=
target_var
->
template
GetMutable
<
LoDTensor
>();
std
::
vector
<
typename
Executor
<
Dtype
,
P
>::
Ptype
>
Executor
<
Dtype
,
P
>::
Predict
(
return
std
::
make_shared
<
LoDTensor
>
(
*
output_tensor
);
const
std
::
vector
<
Ptype
>
&
input
,
const
std
::
vector
<
int64_t
>
&
dims
)
{
framework
::
Tensor
tensor
(
input
,
framework
::
make_ddim
(
dims
));
std
::
shared_ptr
<
framework
::
Tensor
>
output_tensor
=
Predict
(
tensor
,
0
);
if
(
output_tensor
!=
nullptr
)
{
Executor
<
Dtype
,
P
>::
Ptype
*
output_ptr
=
output_tensor
->
data
<
typename
Executor
<
Dtype
,
P
>::
Ptype
>
();
std
::
vector
<
typename
Executor
<
Dtype
,
P
>::
Ptype
>
result_vector
;
for
(
int
j
=
0
;
j
<
output_tensor
->
numel
();
++
j
)
{
result_vector
.
push_back
(
output_ptr
[
j
]);
}
return
result_vector
;
}
else
{
DLOG
<<
"return empty vector"
;
return
{};
}
}
}
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
template
<
typename
Dtype
,
Precision
P
>
template
<
typename
Device
,
typename
T
>
void
Executor
<
Dtype
,
P
>::
InjectVariable
(
const
framework
::
Tensor
&
t
,
void
Executor
<
Device
,
T
>::
InjectVariable
(
const
Tensor
&
t
,
std
::
string
var_name
)
{
std
::
string
var_name
)
{
framework
::
Variable
*
g_feed_value
=
program_
.
scope
->
Var
(
var_name
);
Variable
*
g_feed_value
=
program_
.
scope
->
Var
(
var_name
);
framework
::
Tensor
*
feed_tensor
=
Tensor
*
feed_tensor
=
g_feed_value
->
GetMutable
<
LoDTensor
>
();
g_feed_value
->
GetMutable
<
framework
::
LoDTensor
>
();
feed_tensor
->
Resize
(
t
.
dims
());
feed_tensor
->
Resize
(
t
.
dims
());
feed_tensor
->
ShareDataWith
(
t
);
feed_tensor
->
ShareDataWith
(
t
);
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
void
Executor
<
D
type
,
P
>::
FeedData
(
const
framework
::
Tensor
&
t
)
{
void
Executor
<
D
evice
,
T
>::
FeedData
(
const
Tensor
&
t
)
{
InjectVariable
(
t
,
"feed"
);
InjectVariable
(
t
,
"feed"
);
}
}
template
<
typename
Dtype
,
Precision
P
>
template
<
typename
Device
,
typename
T
>
std
::
shared_ptr
<
framework
::
Tensor
>
Executor
<
Dtype
,
P
>::
FetchResult
(
int
id
)
{
std
::
shared_ptr
<
Tensor
>
Executor
<
Device
,
T
>::
FetchResult
(
int
id
)
{
std
::
shared_ptr
<
framework
::
BlockDesc
>
to_predict_block
=
std
::
shared_ptr
<
BlockDesc
>
to_predict_block
=
program_desc_
->
Block
(
0
);
to_predict_program_
->
Block
(
0
);
auto
&
ops
=
ops_of_block_
[
*
to_predict_block
.
get
()];
auto
&
ops
=
ops_of_block_
[
*
to_predict_block
.
get
()];
PADDLE_MOBILE_ENFORCE
(
id
<
(
int
)
ops
.
size
(),
"Index out of range"
);
PADDLE_MOBILE_ENFORCE
(
id
<
(
int
)
ops
.
size
(),
"Index out of range"
);
...
@@ -465,15 +407,14 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) {
...
@@ -465,15 +407,14 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) {
auto
output_map
=
op
->
Outputs
();
auto
output_map
=
op
->
Outputs
();
std
::
vector
<
std
::
string
>
out_keys
=
op
->
GetOutKeys
();
std
::
vector
<
std
::
string
>
out_keys
=
op
->
GetOutKeys
();
PADDLE_MOBILE_ENFORCE
(
!
out_keys
.
empty
(),
"this op contains no output"
);
PADDLE_MOBILE_ENFORCE
(
!
out_keys
.
empty
(),
"this op contains no output"
);
auto
*
output_tensor
=
framework
::
GetVarValue
<
framework
::
LoDTensor
>
(
auto
*
output_tensor
=
out_keys
[
0
],
output_map
,
*
(
program_
.
scope
));
GetVarValue
<
LoDTensor
>
(
out_keys
[
0
],
output_map
,
*
(
program_
.
scope
));
return
std
::
make_shared
<
framework
::
Tensor
>
(
framework
::
Tensor
(
*
output_tensor
));
return
std
::
make_shared
<
Tensor
>
(
Tensor
(
*
output_tensor
));
}
}
template
<
typename
Dtype
,
Precision
P
>
template
<
typename
Device
,
typename
T
>
void
Executor
<
Dtype
,
P
>::
Predict_From_To
(
int
start
,
int
end
)
{
void
Executor
<
Device
,
T
>::
Predict_From_To
(
int
start
,
int
end
)
{
std
::
shared_ptr
<
framework
::
BlockDesc
>
to_predict_block
=
std
::
shared_ptr
<
BlockDesc
>
to_predict_block
=
program_desc_
->
Block
(
0
);
to_predict_program_
->
Block
(
0
);
auto
&
ops
=
ops_of_block_
[
*
to_predict_block
.
get
()];
auto
&
ops
=
ops_of_block_
[
*
to_predict_block
.
get
()];
end
=
end
<
0
?
static_cast
<
int
>
(
ops
.
size
())
:
end
;
end
=
end
<
0
?
static_cast
<
int
>
(
ops
.
size
())
:
end
;
PADDLE_MOBILE_ENFORCE
(
start
>=
0
&&
start
<
end
&&
end
<=
ops
.
size
(),
PADDLE_MOBILE_ENFORCE
(
start
>=
0
&&
start
<
end
&&
end
<=
ops
.
size
(),
...
@@ -498,25 +439,26 @@ void Executor<Dtype, P>::Predict_From_To(int start, int end) {
...
@@ -498,25 +439,26 @@ void Executor<Dtype, P>::Predict_From_To(int start, int end) {
}
}
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
void
Executor
<
D
type
,
P
>::
Predict_From
(
int
start
)
{
void
Executor
<
D
evice
,
T
>::
Predict_From
(
int
start
)
{
Predict_From_To
(
start
);
Predict_From_To
(
start
);
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
void
Executor
<
D
type
,
P
>::
Predict_To
(
int
end
)
{
void
Executor
<
D
evice
,
T
>::
Predict_To
(
int
end
)
{
Predict_From_To
(
0
,
end
);
Predict_From_To
(
0
,
end
);
}
}
#endif
#endif
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
void
Executor
<
D
type
,
P
>::
LoadMemory
(
const
framework
::
VarDesc
var_desc
,
void
Executor
<
D
evice
,
T
>::
LoadMemory
(
const
VarDesc
var_desc
,
float
*
tensorInput
,
float
*
tensorInput
,
char
**
data
)
{}
char
**
data
)
{}
template
<
>
template
<
>
void
Executor
<
GPU_CL
,
Precision
::
FP32
>::
LoadMemory
(
void
Executor
<
GPU_CL
,
Precision
::
FP32
>::
LoadMemory
(
const
VarDesc
var_desc
,
const
framework
::
VarDesc
var_desc
,
float
*
tensorInput
,
char
**
data
)
{
float
*
tensorInput
,
char
**
data
)
{
// 1. version
// 1. version
uint32_t
version
=
*
reinterpret_cast
<
uint32_t
*>
(
*
data
);
uint32_t
version
=
*
reinterpret_cast
<
uint32_t
*>
(
*
data
);
...
@@ -554,38 +496,13 @@ void Executor<GPU_CL, Precision::FP32>::LoadMemory(
...
@@ -554,38 +496,13 @@ void Executor<GPU_CL, Precision::FP32>::LoadMemory(
}
}
(
*
data
)
+=
(
sizeof
(
char
)
*
size
);
(
*
data
)
+=
(
sizeof
(
char
)
*
size
);
const
framework
::
TensorDesc
&
desc
=
var_desc
.
Tensor_desc
();
const
TensorDesc
&
desc
=
var_desc
.
Tensor_desc
();
int
memory_size
=
1
;
int
memory_size
=
1
;
for
(
auto
l
:
desc
.
Dims
())
{
for
(
auto
l
:
desc
.
Dims
())
{
memory_size
*=
l
;
memory_size
*=
l
;
}
}
void
*
memory
=
nullptr
;
void
*
memory
=
nullptr
;
// int type_size = 0;
// switch (desc.DataType()) {
// case framework::VARTYPE_TYPE_FP16:
// type_size = 2;
// break;
// case framework::VARTYPE_TYPE_FP32:
// type_size = 4;
// memory = tensor->mutable_data<float>();
// break;
// case framework::VARTYPE_TYPE_FP64:
// type_size = 8;
// break;
// case framework::VARTYPE_TYPE_INT32:
// memory = tensor->mutable_data<int32_t>();
// type_size = 4;
// break;
// case framework::VARTYPE_TYPE_INT64:
// type_size = 8;
// break;
// case framework::VARTYPE_TYPE_BOOL:
// type_size = 1;
// break;
// default:
// break;
// }
int
type_size
=
4
;
int
type_size
=
4
;
memory
=
tensorInput
;
memory
=
tensorInput
;
if
(
program_
.
quantification
)
{
if
(
program_
.
quantification
)
{
...
@@ -616,24 +533,24 @@ void Executor<GPU_CL, Precision::FP32>::LoadMemory(
...
@@ -616,24 +533,24 @@ void Executor<GPU_CL, Precision::FP32>::LoadMemory(
}
}
template
<
>
template
<
>
void
Executor
<
GPU_CL
,
Precision
::
FP32
>::
InitMemory
()
{
void
Executor
<
GPU_CL
,
float
>::
InitMemory
()
{
for
(
const
auto
&
block
:
to_predict_program
_
->
Blocks
())
{
for
(
const
auto
&
block
:
program_desc
_
->
Blocks
())
{
for
(
const
auto
&
var_desc
:
block
->
Vars
())
{
for
(
const
auto
&
var_desc
:
block
->
Vars
())
{
auto
var
=
program_
.
scope
->
Var
(
var_desc
->
Name
());
auto
var
=
program_
.
scope
->
Var
(
var_desc
->
Name
());
if
(
var_desc
->
Persistable
())
{
if
(
var_desc
->
Persistable
())
{
CLImage
*
cl_image
=
nullptr
;
CLImage
*
cl_image
=
nullptr
;
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
var
->
template
GetMutable
<
framework
::
LoDTensor
>();
var
->
template
GetMutable
<
LoDTensor
>();
continue
;
continue
;
}
else
{
}
else
{
cl_image
=
var
->
template
GetMutable
<
framework
::
CLImage
>();
cl_image
=
var
->
template
GetMutable
<
CLImage
>();
}
}
char
*
origin_data
=
char
*
origin_data
=
ReadFileToBuff
(
program_
.
model_path
+
"/"
+
var_desc
->
Name
());
ReadFileToBuff
(
program_
.
model_path
+
"/"
+
var_desc
->
Name
());
char
*
data
=
origin_data
;
char
*
data
=
origin_data
;
cl_context
context
=
program_
.
scope
->
GetCLScpoe
()
->
Context
();
cl_context
context
=
program_
.
scope
->
GetCLScpoe
()
->
Context
();
const
framework
::
TensorDesc
&
desc
=
var_desc
->
Tensor_desc
();
const
TensorDesc
&
desc
=
var_desc
->
Tensor_desc
();
int
numel
=
1
;
int
numel
=
1
;
for
(
auto
l
:
desc
.
Dims
())
{
for
(
auto
l
:
desc
.
Dims
())
{
numel
*=
l
;
numel
*=
l
;
...
@@ -643,7 +560,7 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() {
...
@@ -643,7 +560,7 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() {
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
float
)
*
numel
));
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
float
)
*
numel
));
LoadMemory
(
*
var_desc
,
tensorInput
,
&
data
);
LoadMemory
(
*
var_desc
,
tensorInput
,
&
data
);
framework
::
DDim
ddim
=
framework
::
make_ddim
(
desc
.
Dims
());
DDim
ddim
=
make_ddim
(
desc
.
Dims
());
// has not init
// has not init
cl_image
->
SetTensorData
(
tensorInput
,
ddim
);
cl_image
->
SetTensorData
(
tensorInput
,
ddim
);
...
@@ -651,15 +568,15 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() {
...
@@ -651,15 +568,15 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() {
delete
origin_data
;
delete
origin_data
;
paddle_mobile
::
memory
::
Free
(
tensorInput
);
paddle_mobile
::
memory
::
Free
(
tensorInput
);
}
else
{
}
else
{
if
(
var_desc
->
Type
()
==
framework
::
VARTYPE_TYPE_LOD_TENSOR
)
{
if
(
var_desc
->
Type
()
==
VARTYPE_TYPE_LOD_TENSOR
)
{
auto
cl_image
=
var
->
template
GetMutable
<
framework
::
CLImage
>();
auto
cl_image
=
var
->
template
GetMutable
<
CLImage
>();
cl_context
context
=
program_
.
scope
->
GetCLScpoe
()
->
Context
();
cl_context
context
=
program_
.
scope
->
GetCLScpoe
()
->
Context
();
cl_command_queue
command_queue
=
cl_command_queue
command_queue
=
program_
.
scope
->
GetCLScpoe
()
->
CommandQueue
();
program_
.
scope
->
GetCLScpoe
()
->
CommandQueue
();
const
framework
::
TensorDesc
&
desc
=
var_desc
->
Tensor_desc
();
const
TensorDesc
&
desc
=
var_desc
->
Tensor_desc
();
//
framework::DDim ddim = framework::
make_ddim(desc.Dims());
//
DDim ddim =
make_ddim(desc.Dims());
framework
::
DDim
ddim
=
cl_image
->
dims
();
DDim
ddim
=
cl_image
->
dims
();
DLOG
<<
var_desc
->
Name
();
DLOG
<<
var_desc
->
Name
();
cl_image
->
InitEmptyImage
(
context
,
command_queue
,
ddim
);
cl_image
->
InitEmptyImage
(
context
,
command_queue
,
ddim
);
}
}
...
@@ -669,7 +586,7 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() {
...
@@ -669,7 +586,7 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() {
}
}
template
<
>
template
<
>
void
Executor
<
GPU_CL
,
Precision
::
FP32
>::
InitCombineMemory
()
{
void
Executor
<
GPU_CL
,
float
>::
InitCombineMemory
()
{
char
*
origin_data
=
nullptr
;
char
*
origin_data
=
nullptr
;
bool
self_alloc
=
false
;
bool
self_alloc
=
false
;
if
(
program_
.
combined_params_buf
&&
program_
.
combined_params_len
)
{
if
(
program_
.
combined_params_buf
&&
program_
.
combined_params_len
)
{
...
@@ -683,22 +600,22 @@ void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() {
...
@@ -683,22 +600,22 @@ void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() {
PADDLE_MOBILE_ENFORCE
(
origin_data
!=
nullptr
,
"origin_data==nullptr!!!"
);
PADDLE_MOBILE_ENFORCE
(
origin_data
!=
nullptr
,
"origin_data==nullptr!!!"
);
float
*
data
=
reinterpret_cast
<
float
*>
(
origin_data
);
float
*
data
=
reinterpret_cast
<
float
*>
(
origin_data
);
for
(
const
auto
&
block
:
to_predict_program
_
->
Blocks
())
{
for
(
const
auto
&
block
:
program_desc
_
->
Blocks
())
{
for
(
const
auto
&
var_desc
:
block
->
Vars
())
{
for
(
const
auto
&
var_desc
:
block
->
Vars
())
{
auto
var
=
program_
.
scope
->
Var
(
var_desc
->
Name
());
auto
var
=
program_
.
scope
->
Var
(
var_desc
->
Name
());
if
(
var_desc
->
Persistable
())
{
if
(
var_desc
->
Persistable
())
{
CLImage
*
cl_image
=
nullptr
;
CLImage
*
cl_image
=
nullptr
;
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
var
->
template
GetMutable
<
framework
::
LoDTensor
>();
var
->
template
GetMutable
<
LoDTensor
>();
continue
;
continue
;
}
else
{
}
else
{
cl_image
=
var
->
template
GetMutable
<
framework
::
CLImage
>();
cl_image
=
var
->
template
GetMutable
<
CLImage
>();
}
}
cl_context
context
=
program_
.
scope
->
GetCLScpoe
()
->
Context
();
cl_context
context
=
program_
.
scope
->
GetCLScpoe
()
->
Context
();
const
framework
::
TensorDesc
&
desc
=
var_desc
->
Tensor_desc
();
const
TensorDesc
&
desc
=
var_desc
->
Tensor_desc
();
framework
::
DDim
ddim
=
framework
::
make_ddim
(
desc
.
Dims
());
DDim
ddim
=
make_ddim
(
desc
.
Dims
());
int
numel
=
1
;
int
numel
=
1
;
for
(
int
i
=
0
;
i
<
ddim
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
ddim
.
size
();
i
++
)
{
...
@@ -713,13 +630,13 @@ void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() {
...
@@ -713,13 +630,13 @@ void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() {
paddle_mobile
::
memory
::
Free
(
tensorInput
);
paddle_mobile
::
memory
::
Free
(
tensorInput
);
}
else
{
}
else
{
auto
cl_image
=
var
->
template
GetMutable
<
framework
::
CLImage
>();
auto
cl_image
=
var
->
template
GetMutable
<
CLImage
>();
cl_context
context
=
program_
.
scope
->
GetCLScpoe
()
->
Context
();
cl_context
context
=
program_
.
scope
->
GetCLScpoe
()
->
Context
();
cl_command_queue
command_queue
=
cl_command_queue
command_queue
=
program_
.
scope
->
GetCLScpoe
()
->
CommandQueue
();
program_
.
scope
->
GetCLScpoe
()
->
CommandQueue
();
const
framework
::
TensorDesc
&
desc
=
var_desc
->
Tensor_desc
();
const
TensorDesc
&
desc
=
var_desc
->
Tensor_desc
();
framework
::
DDim
ddim
=
cl_image
->
dims
();
DDim
ddim
=
cl_image
->
dims
();
//
framework::DDim ddim = framework::
make_ddim(desc.Dims());
//
DDim ddim =
make_ddim(desc.Dims());
cl_image
->
InitEmptyImage
(
context
,
command_queue
,
ddim
);
cl_image
->
InitEmptyImage
(
context
,
command_queue
,
ddim
);
}
}
}
}
...
@@ -732,13 +649,13 @@ void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() {
...
@@ -732,13 +649,13 @@ void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() {
#endif
#endif
template
class
Executor
<
CPU
,
Precision
::
FP32
>;
template
class
Executor
<
CPU
,
float
>;
template
class
Executor
<
FPGA
,
Precision
::
FP32
>;
template
class
Executor
<
FPGA
,
float
>;
template
class
Executor
<
GPU_CL
,
Precision
::
FP32
>;
template
class
Executor
<
GPU_CL
,
float
>;
template
class
Executor
<
GPU_MALI
,
Precision
::
FP32
>;
template
class
Executor
<
GPU_MALI
,
float
>;
}
// namespace framework
}
// namespace framework
}
// namespace paddle_mobile
}
// namespace paddle_mobile
src/framework/executor.h
浏览文件 @
9729edac
...
@@ -17,6 +17,7 @@ limitations under the License. */
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include <map>
#include <map>
#include <memory>
#include <memory>
#include <string>
#include <string>
#include <utility>
#include <vector>
#include <vector>
#include "common/types.h"
#include "common/types.h"
#include "common/util.h"
#include "common/util.h"
...
@@ -28,41 +29,29 @@ limitations under the License. */
...
@@ -28,41 +29,29 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
framework
{
namespace
framework
{
template
<
typename
D
type
=
CPU
,
Precision
P
=
Precision
::
FP32
>
template
<
typename
D
evice
,
typename
T
=
float
>
class
Executor
{
class
Executor
{
public:
public:
typedef
typename
PrecisionTrait
<
P
>::
ptype
Ptype
;
Executor
(
const
Program
<
Device
>
&
program
,
int
batch_size
=
1
,
// exector constructor
const
bool
use_optimize
=
true
,
const
bool
lod_mode
=
false
);
// @param program program converted from proto program in PaddlePaddle
// @param use_optimize bool whether use operator fusion to speed up or not
PMStatus
Predict
(
const
std
::
vector
<
std
::
pair
<
std
::
string
,
Tensor
>>
&
inputs
);
// @param loddable bool
PMStatus
Predict
(
Executor
(
const
framework
::
Program
<
Dtype
>
program
,
int
batch_size
=
1
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
LoDTensor
>>
&
inputs
);
const
bool
use_optimize
=
true
,
const
bool
loddable
=
false
);
std
::
vector
<
T
>
Predict
(
const
std
::
vector
<
T
>
&
input
,
// predict with tensor input
const
std
::
vector
<
int64_t
>
&
dims
);
// @param t input tensor to do prediction
PMStatus
Predict
();
// @return predicted tensor
std
::
shared_ptr
<
framework
::
Tensor
>
Predict
(
const
framework
::
Tensor
&
t
);
void
SetInput
(
const
Tensor
&
input
,
const
std
::
string
&
var_name
);
void
SetInput
(
const
LoDTensor
&
input
,
const
std
::
string
&
var_name
);
// predict with lod tensor input
// @param t input lod tensor to do prediction
std
::
shared_ptr
<
LoDTensor
>
GetOutput
(
const
std
::
string
&
var_name
);
// @return predicted lod tensor
std
::
shared_ptr
<
framework
::
LoDTensor
>
PredictLod
(
const
framework
::
LoDTensor
&
t
);
// predict with vector input and dims
// @param input vector whose elements will be formed
// @param input lod tensor to do prediction
// @param dims vector whose elements will be formed
// @param input tensor shape
// @return vector which is flatted from predicted tensor
std
::
vector
<
Ptype
>
Predict
(
const
std
::
vector
<
Ptype
>
&
input
,
const
std
::
vector
<
int64_t
>
&
dims
);
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
void
InjectVariable
(
const
framework
::
Tensor
&
t
,
std
::
string
var_name
);
void
InjectVariable
(
const
Tensor
&
t
,
std
::
string
var_name
);
void
FeedData
(
const
framework
::
Tensor
&
t
);
void
FeedData
(
const
Tensor
&
t
);
std
::
shared_ptr
<
framework
::
Tensor
>
FetchResult
(
int
id
=
-
1
);
std
::
shared_ptr
<
Tensor
>
FetchResult
(
int
id
=
-
1
);
void
Predict_From_To
(
int
start
=
0
,
int
end
=
-
1
);
void
Predict_From_To
(
int
start
=
0
,
int
end
=
-
1
);
void
Predict_From
(
int
start
);
void
Predict_From
(
int
start
);
void
Predict_To
(
int
end
);
void
Predict_To
(
int
end
);
...
@@ -70,26 +59,28 @@ class Executor {
...
@@ -70,26 +59,28 @@ class Executor {
protected:
protected:
Executor
()
=
default
;
Executor
()
=
default
;
std
::
shared_ptr
<
framework
::
Tensor
>
Predict
(
const
framework
::
Tensor
&
t
,
int
block_id
);
bool
varInputMemory
(
const
std
::
shared_ptr
<
VarDesc
>
&
var_desc
,
Variable
*
var
,
bool
varInputMemory
(
const
std
::
shared_ptr
<
framework
::
VarDesc
>
&
var_desc
,
LoDTensor
*
tensor
)
const
;
framework
::
Variable
*
var
,
framework
::
LoDTensor
*
tensor
)
const
;
void
InitMemory
();
void
InitMemory
();
void
InitCombineMemory
();
void
InitCombineMemory
();
void
LoadMemory
(
void
**
data
,
void
LoadMemory
(
void
**
data
,
const
std
::
shared_ptr
<
VarDesc
>
var_desc
,
const
std
::
shared_ptr
<
framework
::
VarDesc
>
var_desc
,
LoDTensor
*
tensor
);
framework
::
LoDTensor
*
tensor
);
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
void
LoadMemory
(
const
framework
::
VarDesc
var_desc
,
float
*
tensorInput
,
void
LoadMemory
(
const
VarDesc
var_desc
,
float
*
tensorInput
,
char
**
data
);
char
**
data
);
#endif
#endif
framework
::
Program
<
Dtype
>
program_
;
int
batch_size_
=
1
;
int
batch_size_
;
std
::
shared_ptr
<
framework
::
ProgramDesc
>
to_predict_program_
;
bool
use_optimize_
;
std
::
map
<
framework
::
BlockDesc
,
bool
lod_mode_
;
std
::
vector
<
std
::
shared_ptr
<
framework
::
OperatorBase
<
Dtype
>>>>
Program
<
Device
>
program_
;
ops_of_block_
;
std
::
shared_ptr
<
ProgramDesc
>
program_desc_
;
typedef
std
::
shared_ptr
<
OperatorBase
<
Device
>>
OperatorBasePtr
;
std
::
vector
<
std
::
vector
<
OperatorBasePtr
>>
ops_of_block_
;
// operators list
std
::
vector
<
OperatorBasePtr
>
ops_list_
;
#ifdef PADDLE_MOBILE_PROFILE
#ifdef PADDLE_MOBILE_PROFILE
struct
ProfInfo
{
struct
ProfInfo
{
int
tid
=
0
;
int
tid
=
0
;
...
@@ -97,8 +88,6 @@ class Executor {
...
@@ -97,8 +88,6 @@ class Executor {
uint64_t
runEnd
=
0UL
;
uint64_t
runEnd
=
0UL
;
};
};
#endif
#endif
bool
use_optimize_
=
false
;
bool
loddable_
=
false
;
};
};
}
// namespace framework
}
// namespace framework
...
...
src/framework/loader.cpp
浏览文件 @
9729edac
...
@@ -23,14 +23,8 @@ limitations under the License. */
...
@@ -23,14 +23,8 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
framework
{
namespace
framework
{
/**
template
<
typename
Device
,
typename
T
>
* muteandresize tensor as originProgramDesc and scope in loadParams
void
Loader
<
Device
,
T
>::
InitMemoryFromProgram
(
*
* @param originProgramDesc
* @param scope
*/
template
<
typename
Dtype
,
Precision
P
>
void
Loader
<
Dtype
,
P
>::
InitMemoryFromProgram
(
const
std
::
shared_ptr
<
ProgramDesc
>
&
originProgramDesc
,
const
std
::
shared_ptr
<
ProgramDesc
>
&
originProgramDesc
,
const
std
::
shared_ptr
<
Scope
>
&
scope
)
{
const
std
::
shared_ptr
<
Scope
>
&
scope
)
{
for
(
const
auto
&
block
:
originProgramDesc
.
get
()
->
Blocks
())
{
for
(
const
auto
&
block
:
originProgramDesc
.
get
()
->
Blocks
())
{
...
@@ -43,8 +37,6 @@ void Loader<Dtype, P>::InitMemoryFromProgram(
...
@@ -43,8 +37,6 @@ void Loader<Dtype, P>::InitMemoryFromProgram(
tensor
->
Resize
(
make_ddim
(
dim
));
tensor
->
Resize
(
make_ddim
(
dim
));
}
else
{
}
else
{
auto
dim
=
var_desc
->
Tensor_desc
().
Dims
();
auto
dim
=
var_desc
->
Tensor_desc
().
Dims
();
// PADDLE_MOBILE_ENFORCE(dim.size() > 0, "dim size is 0");
// dim[0] = 1;
if
(
dim
.
size
()
==
0
)
{
if
(
dim
.
size
()
==
0
)
{
auto
tensor
=
var
->
GetMutable
<
LoDTensor
>
();
auto
tensor
=
var
->
GetMutable
<
LoDTensor
>
();
framework
::
DDim
dDim
=
{
0
};
framework
::
DDim
dDim
=
{
0
};
...
@@ -60,7 +52,7 @@ void Loader<Dtype, P>::InitMemoryFromProgram(
...
@@ -60,7 +52,7 @@ void Loader<Dtype, P>::InitMemoryFromProgram(
}
}
}
}
}
else
{
}
else
{
// TODO(codeWorm)
: some.
// TODO(codeWorm)
}
}
}
}
}
}
...
@@ -68,7 +60,7 @@ void Loader<Dtype, P>::InitMemoryFromProgram(
...
@@ -68,7 +60,7 @@ void Loader<Dtype, P>::InitMemoryFromProgram(
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
template
<
>
template
<
>
void
Loader
<
GPU_CL
,
Precision
::
FP32
>::
InitMemoryFromProgram
(
void
Loader
<
GPU_CL
,
float
>::
InitMemoryFromProgram
(
const
std
::
shared_ptr
<
ProgramDesc
>
&
originProgramDesc
,
const
std
::
shared_ptr
<
ProgramDesc
>
&
originProgramDesc
,
const
std
::
shared_ptr
<
Scope
>
&
scope
)
{
const
std
::
shared_ptr
<
Scope
>
&
scope
)
{
for
(
const
auto
&
block
:
originProgramDesc
.
get
()
->
Blocks
())
{
for
(
const
auto
&
block
:
originProgramDesc
.
get
()
->
Blocks
())
{
...
@@ -77,7 +69,6 @@ void Loader<GPU_CL, Precision::FP32>::InitMemoryFromProgram(
...
@@ -77,7 +69,6 @@ void Loader<GPU_CL, Precision::FP32>::InitMemoryFromProgram(
if
(
var_desc
->
Type
()
==
VARTYPE_TYPE_LOD_TENSOR
)
{
if
(
var_desc
->
Type
()
==
VARTYPE_TYPE_LOD_TENSOR
)
{
if
(
var_desc
->
Persistable
())
{
if
(
var_desc
->
Persistable
())
{
auto
dim
=
var_desc
->
Tensor_desc
().
Dims
();
auto
dim
=
var_desc
->
Tensor_desc
().
Dims
();
// auto tensor = var->GetMutable<LoDTensor>();
auto
cl_image
=
var
->
GetMutable
<
framework
::
CLImage
>
();
auto
cl_image
=
var
->
GetMutable
<
framework
::
CLImage
>
();
cl_image
->
Resize
(
make_ddim
(
dim
));
cl_image
->
Resize
(
make_ddim
(
dim
));
}
else
{
}
else
{
...
@@ -88,14 +79,13 @@ void Loader<GPU_CL, Precision::FP32>::InitMemoryFromProgram(
...
@@ -88,14 +79,13 @@ void Loader<GPU_CL, Precision::FP32>::InitMemoryFromProgram(
cl_image
->
Resize
(
make_ddim
(
dim
));
cl_image
->
Resize
(
make_ddim
(
dim
));
}
}
}
else
{
}
else
{
// TODO(codeWorm)
: some.
// TODO(codeWorm)
}
}
}
}
}
}
}
}
template
<
>
template
<
>
const
Program
<
GPU_CL
,
Precision
::
FP32
>
const
Program
<
GPU_CL
,
float
>
Loader
<
GPU_CL
,
float
>::
LoadCombinedMemory
(
Loader
<
GPU_CL
,
Precision
::
FP32
>::
LoadCombinedMemory
(
size_t
read_size
,
const
uint8_t
*
buf
,
size_t
combined_params_len
,
size_t
read_size
,
const
uint8_t
*
buf
,
size_t
combined_params_len
,
uint8_t
*
combined_params_buf
,
bool
optimize
,
bool
quantification
)
{
uint8_t
*
combined_params_buf
,
bool
optimize
,
bool
quantification
)
{
bool
can_add_split
=
false
;
bool
can_add_split
=
false
;
...
@@ -113,7 +103,7 @@ Loader<GPU_CL, Precision::FP32>::LoadCombinedMemory(
...
@@ -113,7 +103,7 @@ Loader<GPU_CL, Precision::FP32>::LoadCombinedMemory(
auto
originProgramDesc
=
std
::
make_shared
<
ProgramDesc
>
(
c_program
);
auto
originProgramDesc
=
std
::
make_shared
<
ProgramDesc
>
(
c_program
);
Program
<
GPU_CL
,
Precision
::
FP32
>
program
;
Program
<
GPU_CL
,
float
>
program
;
program
.
combined
=
true
;
program
.
combined
=
true
;
program
.
originProgram
=
originProgramDesc
;
program
.
originProgram
=
originProgramDesc
;
program
.
quantification
=
quantification
;
program
.
quantification
=
quantification
;
...
@@ -145,16 +135,16 @@ Loader<GPU_CL, Precision::FP32>::LoadCombinedMemory(
...
@@ -145,16 +135,16 @@ Loader<GPU_CL, Precision::FP32>::LoadCombinedMemory(
/**
/**
* fusion and print someinfos
* fusion and print someinfos
* @tparam D
typ
e
* @tparam D
evic
e
* @tparam P
* @tparam P
* @param optimize
* @param optimize
* @param can_add_split
* @param can_add_split
* @param program
* @param program
* @param originProgramDesc
* @param originProgramDesc
*/
*/
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
void
FusionAndPrintInfos
(
void
FusionAndPrintInfos
(
bool
optimize
,
bool
can_add_split
,
Program
<
D
type
,
P
>
*
program
,
bool
optimize
,
bool
can_add_split
,
Program
<
D
evice
,
T
>
*
program
,
const
std
::
shared_ptr
<
ProgramDesc
>
&
originProgramDesc
)
{
const
std
::
shared_ptr
<
ProgramDesc
>
&
originProgramDesc
)
{
if
(
optimize
)
{
if
(
optimize
)
{
ProgramOptimize
program_optimize
;
ProgramOptimize
program_optimize
;
...
@@ -193,22 +183,22 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) {
...
@@ -193,22 +183,22 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) {
return
cur_len
;
return
cur_len
;
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
const
Program
<
D
type
,
P
>
Loader
<
Dtype
,
P
>::
Load
(
const
std
::
string
&
dirname
,
const
Program
<
D
evice
,
T
>
Loader
<
Device
,
T
>::
Load
(
const
std
::
string
&
dirname
,
bool
optimize
,
bool
optimize
,
bool
quantification
,
bool
quantification
,
bool
can_add_split
)
{
bool
can_add_split
)
{
auto
program
=
this
->
LoadProgram
(
dirname
+
"/__model__"
,
optimize
,
auto
program
=
this
->
LoadProgram
(
dirname
+
"/__model__"
,
optimize
,
quantification
,
can_add_split
);
quantification
,
can_add_split
);
program
.
model_path
=
dirname
;
program
.
model_path
=
dirname
;
return
program
;
return
program
;
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
const
Program
<
D
type
,
P
>
Loader
<
Dtype
,
P
>::
Load
(
const
std
::
string
&
model_path
,
const
Program
<
D
evice
,
T
>
Loader
<
Device
,
T
>::
Load
(
const
std
::
string
&
model_path
,
const
std
::
string
&
para_path
,
const
std
::
string
&
para_path
,
bool
optimize
,
bool
optimize
,
bool
quantification
)
{
bool
quantification
)
{
auto
program
=
this
->
LoadProgram
(
model_path
,
optimize
,
quantification
);
auto
program
=
this
->
LoadProgram
(
model_path
,
optimize
,
quantification
);
program
.
para_path
=
para_path
;
program
.
para_path
=
para_path
;
...
@@ -217,8 +207,8 @@ const Program<Dtype, P> Loader<Dtype, P>::Load(const std::string &model_path,
...
@@ -217,8 +207,8 @@ const Program<Dtype, P> Loader<Dtype, P>::Load(const std::string &model_path,
return
program
;
return
program
;
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
const
Program
<
D
type
,
P
>
Loader
<
Dtype
,
P
>::
LoadProgram
(
const
Program
<
D
evice
,
T
>
Loader
<
Device
,
T
>::
LoadProgram
(
const
std
::
string
&
model_path
,
bool
optimize
,
bool
quantification
,
const
std
::
string
&
model_path
,
bool
optimize
,
bool
quantification
,
bool
can_add_split
)
{
bool
can_add_split
)
{
std
::
string
model_filename
=
model_path
;
std
::
string
model_filename
=
model_path
;
...
@@ -237,7 +227,7 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
...
@@ -237,7 +227,7 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
//
//
auto
originProgramDesc
=
std
::
make_shared
<
ProgramDesc
>
(
c_program
);
auto
originProgramDesc
=
std
::
make_shared
<
ProgramDesc
>
(
c_program
);
Program
<
D
type
,
P
>
program
;
Program
<
D
evice
,
T
>
program
;
program
.
originProgram
=
originProgramDesc
;
program
.
originProgram
=
originProgramDesc
;
program
.
quantification
=
quantification
;
program
.
quantification
=
quantification
;
program
.
combined_params_len
=
0
;
program
.
combined_params_len
=
0
;
...
@@ -254,8 +244,8 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
...
@@ -254,8 +244,8 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
return
program
;
return
program
;
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
const
Program
<
D
type
,
P
>
Loader
<
Dtype
,
P
>::
LoadCombinedMemory
(
const
Program
<
D
evice
,
T
>
Loader
<
Device
,
T
>::
LoadCombinedMemory
(
size_t
read_size
,
const
uint8_t
*
buf
,
size_t
combined_params_len
,
size_t
read_size
,
const
uint8_t
*
buf
,
size_t
combined_params_len
,
uint8_t
*
combined_params_buf
,
bool
optimize
,
bool
quantification
)
{
uint8_t
*
combined_params_buf
,
bool
optimize
,
bool
quantification
)
{
bool
can_add_split
=
false
;
bool
can_add_split
=
false
;
...
@@ -273,7 +263,7 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory(
...
@@ -273,7 +263,7 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory(
auto
originProgramDesc
=
std
::
make_shared
<
ProgramDesc
>
(
c_program
);
auto
originProgramDesc
=
std
::
make_shared
<
ProgramDesc
>
(
c_program
);
Program
<
D
type
,
P
>
program
;
Program
<
D
evice
,
T
>
program
;
program
.
combined
=
true
;
program
.
combined
=
true
;
program
.
originProgram
=
originProgramDesc
;
program
.
originProgram
=
originProgramDesc
;
program
.
quantification
=
quantification
;
program
.
quantification
=
quantification
;
...
@@ -289,13 +279,13 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory(
...
@@ -289,13 +279,13 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory(
return
program
;
return
program
;
}
}
template
class
Loader
<
CPU
,
Precision
::
FP32
>;
template
class
Loader
<
CPU
,
float
>;
template
class
Loader
<
FPGA
,
Precision
::
FP32
>;
template
class
Loader
<
FPGA
,
float
>;
template
class
Loader
<
GPU_MALI
,
Precision
::
FP32
>;
template
class
Loader
<
GPU_MALI
,
float
>;
template
class
Loader
<
GPU_CL
,
Precision
::
FP32
>;
template
class
Loader
<
GPU_CL
,
float
>;
}
// namespace framework
}
// namespace framework
}
// namespace paddle_mobile
}
// namespace paddle_mobile
src/framework/loader.h
浏览文件 @
9729edac
...
@@ -22,39 +22,39 @@ limitations under the License. */
...
@@ -22,39 +22,39 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
framework
{
namespace
framework
{
template
<
typename
D
type
=
CPU
,
Precision
P
=
Precision
::
FP32
>
template
<
typename
D
evice
=
CPU
,
typename
T
=
float
>
class
Loader
{
class
Loader
{
public:
public:
/*
/*
* @b load separate format fluid model
* @b load separate format fluid model
* @b 加载分开
形式的 fluid
模型
* @b 加载分开
存储的fluid
模型
* */
* */
const
Program
<
D
type
,
P
>
Load
(
const
std
::
string
&
dirname
,
const
Program
<
D
evice
,
T
>
Load
(
const
std
::
string
&
dirname
,
bool
optimize
=
false
,
bool
optimize
=
false
,
bool
quantification
=
false
,
bool
quantification
=
false
,
bool
can_add_split
=
false
);
bool
can_add_split
=
false
);
/*
/*
* @b load combine format fluid mode
* @b load combine format fluid mode
* @b 加载
结合在一起格式的
模型
* @b 加载
统一存储的fluid
模型
* */
* */
const
Program
<
D
type
,
P
>
Load
(
const
std
::
string
&
model_path
,
const
Program
<
D
evice
,
T
>
Load
(
const
std
::
string
&
model_path
,
const
std
::
string
&
para_path
,
const
std
::
string
&
para_path
,
bool
optimize
=
false
,
bool
optimize
=
false
,
bool
quantification
=
false
);
bool
quantification
=
false
);
const
Program
<
D
type
,
P
>
LoadCombinedMemory
(
size_t
model_len
,
const
Program
<
D
evice
,
T
>
LoadCombinedMemory
(
size_t
model_len
,
const
uint8_t
*
model_buf
,
const
uint8_t
*
model_buf
,
size_t
combined_params_len
,
size_t
combined_params_len
,
uint8_t
*
combined_params_buf
,
uint8_t
*
combined_params_buf
,
bool
optimize
=
false
,
bool
optimize
=
false
,
bool
quantification
=
false
);
bool
quantification
=
false
);
private:
private:
const
Program
<
D
type
,
P
>
LoadProgram
(
const
std
::
string
&
model_path
,
const
Program
<
D
evice
,
T
>
LoadProgram
(
const
std
::
string
&
model_path
,
bool
optimize
=
false
,
bool
optimize
=
false
,
bool
quantification
=
false
,
bool
quantification
=
false
,
bool
can_add_split
=
false
);
bool
can_add_split
=
false
);
void
InitMemoryFromProgram
(
void
InitMemoryFromProgram
(
const
std
::
shared_ptr
<
ProgramDesc
>
&
originProgramDesc
,
const
std
::
shared_ptr
<
ProgramDesc
>
&
originProgramDesc
,
...
...
src/framework/lod_tensor.h
浏览文件 @
9729edac
...
@@ -16,12 +16,12 @@ limitations under the License. */
...
@@ -16,12 +16,12 @@ limitations under the License. */
#include <memory>
#include <memory>
#include <string>
#include <string>
#include <utility>
#include <vector>
#include <vector>
#include "tensor.h"
#include "
framework/
tensor.h"
#include "tensor_util.h"
#include "
framework/
tensor_util.h"
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
framework
{
namespace
framework
{
/*
/*
...
@@ -202,5 +202,29 @@ void SerializeToStream(std::ostream &os, const LoDTensor &tensor);
...
@@ -202,5 +202,29 @@ void SerializeToStream(std::ostream &os, const LoDTensor &tensor);
void
DeserializeFromStream
(
std
::
istream
&
is
,
LoDTensor
*
tensor
);
void
DeserializeFromStream
(
std
::
istream
&
is
,
LoDTensor
*
tensor
);
#ifdef PADDLE_MOBILE_DEBUG
inline
Print
&
operator
<<
(
Print
&
printer
,
const
LoDTensor
&
tensor
)
{
printer
<<
" dims: "
<<
tensor
.
dims
()
<<
"
\n
"
;
int
stride
=
tensor
.
numel
()
/
20
;
stride
=
stride
>
0
?
stride
:
1
;
#ifndef PADDLE_MOBILE_FPGA
for
(
int
i
=
0
;
i
<
tensor
.
numel
();
i
+=
stride
)
{
if
(
tensor
.
type
()
==
typeid
(
float
))
{
printer
<<
tensor
.
data
<
float
>
()[
i
]
<<
" "
;
}
else
if
(
tensor
.
type
()
==
typeid
(
int32_t
))
{
printer
<<
tensor
.
data
<
int32_t
>
()[
i
]
<<
" "
;
}
else
if
(
tensor
.
type
()
==
typeid
(
int64_t
))
{
printer
<<
tensor
.
data
<
int64_t
>
()[
i
]
<<
" "
;
}
else
if
(
tensor
.
type
()
==
typeid
(
int8_t
))
{
printer
<<
static_cast
<
int
>
(
tensor
.
data
<
int8_t
>
()[
i
])
<<
" "
;
}
else
if
(
tensor
.
type
()
==
typeid
(
int32_t
))
{
printer
<<
tensor
.
data
<
int32_t
>
()[
i
]
<<
" "
;
}
}
#endif // PADDLE_MOBILE_FPGA
return
printer
;
}
#endif // PADDLE_MOBILE_DEBUG
}
// namespace framework
}
// namespace framework
}
// namespace paddle_mobile
}
// namespace paddle_mobile
src/framework/program/program.h
浏览文件 @
9729edac
...
@@ -14,16 +14,15 @@ limitations under the License. */
...
@@ -14,16 +14,15 @@ limitations under the License. */
#pragma once
#pragma once
#include <string>
#include "common/types.h"
#include "common/types.h"
#include "framework/program/program_desc.h"
#include "framework/program/program_desc.h"
#include "framework/scope.h"
#include "framework/scope.h"
#include <string>
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
framework
{
namespace
framework
{
template
<
typename
D
type
,
Precision
P
=
Precision
::
FP32
>
template
<
typename
D
evice
,
typename
T
=
float
>
class
Program
{
class
Program
{
public:
public:
std
::
shared_ptr
<
ProgramDesc
>
originProgram
;
std
::
shared_ptr
<
ProgramDesc
>
originProgram
;
...
...
src/framework/scope.h
浏览文件 @
9729edac
...
@@ -26,6 +26,7 @@ limitations under the License. */
...
@@ -26,6 +26,7 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
framework
{
namespace
framework
{
class
Scope
{
class
Scope
{
public:
public:
Scope
()
=
default
;
Scope
()
=
default
;
...
...
src/framework/tensor.h
浏览文件 @
9729edac
...
@@ -226,7 +226,6 @@ inline Print &operator<<(Print &printer, const Tensor &tensor) {
...
@@ -226,7 +226,6 @@ inline Print &operator<<(Print &printer, const Tensor &tensor) {
}
}
}
}
#endif
#endif
return
printer
;
return
printer
;
}
}
...
...
src/io/api_paddle_mobile.cc
浏览文件 @
9729edac
...
@@ -18,17 +18,17 @@
...
@@ -18,17 +18,17 @@
namespace
paddle_mobile
{
namespace
paddle_mobile
{
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
PaddleMobilePredictor
<
D
type
,
P
>::
PaddleMobilePredictor
(
PaddleMobilePredictor
<
D
evice
,
T
>::
PaddleMobilePredictor
(
const
PaddleMobileConfig
&
config
)
{
const
PaddleMobileConfig
&
config
)
{
PADDLE_MOBILE_ENFORCE
(
Init
(
config
)
==
true
,
PADDLE_MOBILE_ENFORCE
(
Init
(
config
)
==
true
,
"paddle mobile predictor init failed!"
);
"paddle mobile predictor init failed!"
);
config_
=
config
;
config_
=
config
;
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
bool
PaddleMobilePredictor
<
D
type
,
P
>::
Init
(
const
PaddleMobileConfig
&
config
)
{
bool
PaddleMobilePredictor
<
D
evice
,
T
>::
Init
(
const
PaddleMobileConfig
&
config
)
{
paddle_mobile_
.
reset
(
new
PaddleMobile
<
D
type
,
P
>
());
paddle_mobile_
.
reset
(
new
PaddleMobile
<
D
evice
,
T
>
());
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
paddle_mobile_
->
SetCLPath
(
config
.
cl_path
);
paddle_mobile_
->
SetCLPath
(
config
.
cl_path
);
#endif
#endif
...
@@ -52,8 +52,8 @@ bool PaddleMobilePredictor<Dtype, P>::Init(const PaddleMobileConfig &config) {
...
@@ -52,8 +52,8 @@ bool PaddleMobilePredictor<Dtype, P>::Init(const PaddleMobileConfig &config) {
paddle_mobile_
->
SetThreadNum
(
config
.
thread_num
);
paddle_mobile_
->
SetThreadNum
(
config
.
thread_num
);
return
true
;
return
true
;
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
bool
PaddleMobilePredictor
<
D
type
,
P
>::
Run
(
bool
PaddleMobilePredictor
<
D
evice
,
T
>::
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
)
{
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
)
{
if
(
inputs
.
empty
())
{
if
(
inputs
.
empty
())
{
...
@@ -78,12 +78,12 @@ bool PaddleMobilePredictor<Dtype, P>::Run(
...
@@ -78,12 +78,12 @@ bool PaddleMobilePredictor<Dtype, P>::Run(
framework
::
Tensor
input_tensor
;
framework
::
Tensor
input_tensor
;
input_tensor
.
Resize
(
ddim
);
input_tensor
.
Resize
(
ddim
);
int
input_length
=
framework
::
product
(
ddim
);
int
input_length
=
framework
::
product
(
ddim
);
typedef
typename
PrecisionTrait
<
P
>::
ptype
PType
;
auto
input_ptr
=
input_tensor
.
mutable_data
<
T
>
();
auto
input_ptr
=
input_tensor
.
mutable_data
<
PType
>
();
memcpy
(
input_ptr
,
static_cast
<
PType
*>
(
input
.
data
.
data
()),
memcpy
(
input_ptr
,
static_cast
<
T
*>
(
input
.
data
.
data
()),
input_length
*
sizeof
(
PType
));
input_length
*
sizeof
(
T
));
auto
output_tensor
=
paddle_mobile_
->
Predict
(
input_tensor
);
paddle_mobile_
->
Predict
(
input_tensor
);
auto
output_tensor
=
paddle_mobile_
->
Fetch
();
if
(
output_data
->
empty
())
{
if
(
output_data
->
empty
())
{
LOG
(
kLOG_ERROR
)
<<
"At least one output should be set with tensors' names."
;
LOG
(
kLOG_ERROR
)
<<
"At least one output should be set with tensors' names."
;
...
@@ -99,18 +99,18 @@ bool PaddleMobilePredictor<Dtype, P>::Run(
...
@@ -99,18 +99,18 @@ bool PaddleMobilePredictor<Dtype, P>::Run(
output
.
shape
.
push_back
(
static_cast
<
int
>
(
d
));
output
.
shape
.
push_back
(
static_cast
<
int
>
(
d
));
}
}
if
(
output
.
data
.
length
()
<
output_length
*
sizeof
(
PType
))
{
if
(
output
.
data
.
length
()
<
output_length
*
sizeof
(
T
))
{
output
.
data
.
Resize
(
output_length
*
sizeof
(
PType
));
output
.
data
.
Resize
(
output_length
*
sizeof
(
T
));
}
}
memcpy
(
output
.
data
.
data
(),
output_tensor
->
template
data
<
PType
>(),
memcpy
(
output
.
data
.
data
(),
output_tensor
->
template
data
<
T
>(),
output_length
*
sizeof
(
PType
));
output_length
*
sizeof
(
T
));
return
true
;
return
true
;
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
PaddleMobilePredictor
<
D
type
,
P
>::~
PaddleMobilePredictor
()
{
PaddleMobilePredictor
<
D
evice
,
T
>::~
PaddleMobilePredictor
()
{
paddle_mobile_
->
Clear
();
paddle_mobile_
->
Clear
();
}
}
...
@@ -122,13 +122,13 @@ CreatePaddlePredictor<PaddleMobileConfig, PaddleEngineKind::kPaddleMobile>(
...
@@ -122,13 +122,13 @@ CreatePaddlePredictor<PaddleMobileConfig, PaddleEngineKind::kPaddleMobile>(
std
::
unique_ptr
<
PaddlePredictor
>
x
;
std
::
unique_ptr
<
PaddlePredictor
>
x
;
if
(
config
.
precision
==
PaddleMobileConfig
::
FP32
)
{
if
(
config
.
precision
==
PaddleMobileConfig
::
FP32
)
{
if
(
config
.
device
==
PaddleMobileConfig
::
kCPU
)
{
if
(
config
.
device
==
PaddleMobileConfig
::
kCPU
)
{
x
.
reset
(
new
PaddleMobilePredictor
<
CPU
,
Precision
::
FP32
>
(
config
));
x
.
reset
(
new
PaddleMobilePredictor
<
CPU
,
float
>
(
config
));
}
else
if
(
config
.
device
==
PaddleMobileConfig
::
kFPGA
)
{
}
else
if
(
config
.
device
==
PaddleMobileConfig
::
kFPGA
)
{
x
.
reset
(
new
PaddleMobilePredictor
<
FPGA
,
Precision
::
FP32
>
(
config
));
x
.
reset
(
new
PaddleMobilePredictor
<
FPGA
,
float
>
(
config
));
}
else
if
(
config
.
device
==
PaddleMobileConfig
::
kGPU_MALI
)
{
}
else
if
(
config
.
device
==
PaddleMobileConfig
::
kGPU_MALI
)
{
x
.
reset
(
new
PaddleMobilePredictor
<
GPU_MALI
,
Precision
::
FP32
>
(
config
));
x
.
reset
(
new
PaddleMobilePredictor
<
GPU_MALI
,
float
>
(
config
));
}
else
if
(
config
.
device
==
PaddleMobileConfig
::
kGPU_CL
)
{
}
else
if
(
config
.
device
==
PaddleMobileConfig
::
kGPU_CL
)
{
x
.
reset
(
new
PaddleMobilePredictor
<
GPU_CL
,
Precision
::
FP32
>
(
config
));
x
.
reset
(
new
PaddleMobilePredictor
<
GPU_CL
,
float
>
(
config
));
}
else
{
}
else
{
LOG
(
kLOG_ERROR
)
<<
"unsupport device type!"
;
LOG
(
kLOG_ERROR
)
<<
"unsupport device type!"
;
return
nullptr
;
return
nullptr
;
...
...
src/io/api_paddle_mobile.h
浏览文件 @
9729edac
...
@@ -29,7 +29,7 @@ limitations under the License. */
...
@@ -29,7 +29,7 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
paddle_mobile
{
template
<
typename
D
type
=
CPU
,
Precision
P
=
Precision
::
FP32
>
template
<
typename
D
evice
=
CPU
,
typename
T
=
float
>
class
PaddleMobilePredictor
:
public
PaddlePredictor
{
class
PaddleMobilePredictor
:
public
PaddlePredictor
{
public:
public:
PaddleMobilePredictor
()
=
delete
;
PaddleMobilePredictor
()
=
delete
;
...
@@ -43,7 +43,7 @@ class PaddleMobilePredictor : public PaddlePredictor {
...
@@ -43,7 +43,7 @@ class PaddleMobilePredictor : public PaddlePredictor {
~
PaddleMobilePredictor
()
override
;
~
PaddleMobilePredictor
()
override
;
private:
private:
std
::
unique_ptr
<
PaddleMobile
<
D
type
,
P
>>
paddle_mobile_
;
std
::
unique_ptr
<
PaddleMobile
<
D
evice
,
T
>>
paddle_mobile_
;
bool
Init
(
const
PaddleMobileConfig
&
config
);
bool
Init
(
const
PaddleMobileConfig
&
config
);
PaddleMobileConfig
config_
;
PaddleMobileConfig
config_
;
...
...
src/io/ios_io/PaddleMobileCPU.mm
浏览文件 @
9729edac
...
@@ -48,7 +48,7 @@
...
@@ -48,7 +48,7 @@
@interface
PaddleMobileCPU
()
@interface
PaddleMobileCPU
()
{
{
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
,
paddle_mobile
::
Precision
::
FP32
>
*
pam_
;
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
,
float
>
*
pam_
;
BOOL
loaded_
;
BOOL
loaded_
;
}
}
@end
@end
...
@@ -59,7 +59,7 @@ static std::mutex shared_mutex;
...
@@ -59,7 +59,7 @@ static std::mutex shared_mutex;
-
(
instancetype
)
init
{
-
(
instancetype
)
init
{
if
(
self
=
[
super
init
])
{
if
(
self
=
[
super
init
])
{
pam_
=
new
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
,
paddle_mobile
::
Precision
::
FP32
>
();
pam_
=
new
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
,
float
>
();
}
}
return
self
;
return
self
;
}
}
...
@@ -220,7 +220,8 @@ static std::mutex shared_mutex;
...
@@ -220,7 +220,8 @@ static std::mutex shared_mutex;
memcpy
(
input_ptr
,
input
,
memcpy
(
input_ptr
,
input
,
numel
*
sizeof
(
float
));
numel
*
sizeof
(
float
));
std
::
shared_ptr
<
paddle_mobile
::
framework
::
Tensor
>
output
=
pam_
->
Predict
(
input_tensor
);
pam_
->
Predict
(
input_tensor
);
std
::
shared_ptr
<
paddle_mobile
::
framework
::
Tensor
>
output
=
pam_
->
Fetch
();
float
*
output_pointer
=
new
float
[
output
->
numel
()];
float
*
output_pointer
=
new
float
[
output
->
numel
()];
...
...
src/io/jni/paddle_mobile_jni.cpp
浏览文件 @
9729edac
...
@@ -16,21 +16,23 @@ limitations under the License. */
...
@@ -16,21 +16,23 @@ limitations under the License. */
#include "paddle_mobile_jni.h"
#include "paddle_mobile_jni.h"
#include <cmath>
#include <cmath>
#include <string>
#include <vector>
#include "common/log.h"
#include "common/log.h"
#include "framework/tensor.h"
#include "framework/tensor.h"
#include "io/paddle_mobile.h"
#include "io/paddle_mobile.h"
#ifdef ENABLE_EXCEPTION
#ifdef ENABLE_EXCEPTION
#include "common/enforce.h"
#include "common/enforce.h"
#endif
#endif
#ifdef __cplusplus
#ifdef __cplusplus
extern
"C"
{
extern
"C"
{
#endif
#endif
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
jni
{
namespace
jni
{
using
framework
::
DDim
;
using
framework
::
DDim
;
using
framework
::
Program
;
using
framework
::
Program
;
using
framework
::
Tensor
;
using
framework
::
Tensor
;
...
@@ -200,7 +202,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
...
@@ -200,7 +202,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
for
(
int
i
=
0
;
i
<
length
;
i
++
)
{
for
(
int
i
=
0
;
i
<
length
;
i
++
)
{
input_ptr
[
i
]
=
dataPointer
[
i
];
input_ptr
[
i
]
=
dataPointer
[
i
];
}
}
auto
output
=
getPaddleMobileInstance
()
->
Predict
(
input
);
getPaddleMobileInstance
()
->
Predict
(
input
);
auto
output
=
getPaddleMobileInstance
()
->
Fetch
();
count
=
output
->
numel
();
count
=
output
->
numel
();
result
=
env
->
NewFloatArray
(
count
);
result
=
env
->
NewFloatArray
(
count
);
env
->
SetFloatArrayRegion
(
result
,
0
,
count
,
output
->
data
<
float
>
());
env
->
SetFloatArrayRegion
(
result
,
0
,
count
,
output
->
data
<
float
>
());
...
@@ -233,7 +236,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
...
@@ -233,7 +236,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
for
(
int
i
=
0
;
i
<
length
;
i
++
)
{
for
(
int
i
=
0
;
i
<
length
;
i
++
)
{
input_ptr
[
i
]
=
dataPointer
[
i
];
input_ptr
[
i
]
=
dataPointer
[
i
];
}
}
auto
output
=
getPaddleMobileInstance
()
->
Predict
(
input
);
getPaddleMobileInstance
()
->
Predict
(
input
);
auto
output
=
getPaddleMobileInstance
()
->
Fetch
();
count
=
output
->
numel
();
count
=
output
->
numel
();
result
=
env
->
NewFloatArray
(
count
);
result
=
env
->
NewFloatArray
(
count
);
env
->
SetFloatArrayRegion
(
result
,
0
,
count
,
output
->
data
<
float
>
());
env
->
SetFloatArrayRegion
(
result
,
0
,
count
,
output
->
data
<
float
>
());
...
@@ -328,7 +332,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictYuv(
...
@@ -328,7 +332,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictYuv(
for
(
int
i
=
0
;
i
<
length
;
i
++
)
{
for
(
int
i
=
0
;
i
<
length
;
i
++
)
{
input_ptr
[
i
]
=
matrix
[
i
];
input_ptr
[
i
]
=
matrix
[
i
];
}
}
auto
output
=
getPaddleMobileInstance
()
->
Predict
(
input
);
getPaddleMobileInstance
()
->
Predict
(
input
);
auto
output
=
getPaddleMobileInstance
()
->
Fetch
();
count
=
output
->
numel
();
count
=
output
->
numel
();
result
=
env
->
NewFloatArray
(
count
);
result
=
env
->
NewFloatArray
(
count
);
env
->
SetFloatArrayRegion
(
result
,
0
,
count
,
output
->
data
<
float
>
());
env
->
SetFloatArrayRegion
(
result
,
0
,
count
,
output
->
data
<
float
>
());
...
@@ -363,7 +368,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictYuv(
...
@@ -363,7 +368,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictYuv(
for
(
int
i
=
0
;
i
<
length
;
i
++
)
{
for
(
int
i
=
0
;
i
<
length
;
i
++
)
{
input_ptr
[
i
]
=
matrix
[
i
];
input_ptr
[
i
]
=
matrix
[
i
];
}
}
auto
output
=
getPaddleMobileInstance
()
->
Predict
(
input
);
getPaddleMobileInstance
()
->
Predict
(
input
);
auto
output
=
getPaddleMobileInstance
()
->
Fetch
();
count
=
output
->
numel
();
count
=
output
->
numel
();
result
=
env
->
NewFloatArray
(
count
);
result
=
env
->
NewFloatArray
(
count
);
env
->
SetFloatArrayRegion
(
result
,
0
,
count
,
output
->
data
<
float
>
());
env
->
SetFloatArrayRegion
(
result
,
0
,
count
,
output
->
data
<
float
>
());
...
@@ -399,7 +405,8 @@ Java_com_baidu_paddle_PML_predictLod(JNIEnv *env, jclass thiz, jlongArray buf) {
...
@@ -399,7 +405,8 @@ Java_com_baidu_paddle_PML_predictLod(JNIEnv *env, jclass thiz, jlongArray buf) {
auto
*
pdata
=
words
.
mutable_data
<
int64_t
>
();
auto
*
pdata
=
words
.
mutable_data
<
int64_t
>
();
size_t
n
=
words
.
numel
()
*
sizeof
(
int64_t
);
size_t
n
=
words
.
numel
()
*
sizeof
(
int64_t
);
memcpy
(
pdata
,
ids
.
data
(),
n
);
memcpy
(
pdata
,
ids
.
data
(),
n
);
auto
vec_result
=
paddle_mobile
.
PredictLod
(
words
);
paddle_mobile
.
Predict
(
words
);
auto
vec_result
=
paddle_mobile
.
Fetch
();
int
count
=
vec_result
->
numel
();
int
count
=
vec_result
->
numel
();
jlongArray
result
=
NULL
;
jlongArray
result
=
NULL
;
ANDROIDLOGE
(
"predict nlp size %d"
,
count
);
ANDROIDLOGE
(
"predict nlp size %d"
,
count
);
...
...
src/io/paddle_mobile.cpp
浏览文件 @
9729edac
...
@@ -13,81 +13,81 @@ See the License for the specific language governing permissions and
...
@@ -13,81 +13,81 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "io/paddle_mobile.h"
#include "io/paddle_mobile.h"
#include <utility>
#include "common/common.h"
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
#include <CL/cl.h>
#include <CL/cl.h>
#include "framework/cl/cl_tensor.h"
#include "framework/cl/cl_tensor.h"
#endif
#endif
#include "common/common.h"
#include "operators/math/gemm.h"
#include "operators/math/gemm.h"
namespace
paddle_mobile
{
namespace
paddle_mobile
{
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
void
PaddleMobile
<
D
type
,
P
>::
SetThreadNum
(
int
num
)
{
void
PaddleMobile
<
D
evice
,
T
>::
SetThreadNum
(
int
num
)
{
#ifdef _OPENMP
#ifdef _OPENMP
omp_set_num_threads
(
num
);
omp_set_num_threads
(
num
);
#endif
#endif
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
bool
PaddleMobile
<
Dtype
,
P
>::
Load
(
const
std
::
string
&
dirname
,
bool
optimiz
e
,
PMStatus
PaddleMobile
<
Device
,
T
>::
Load
(
const
std
::
string
&
dirnam
e
,
bool
quantification
,
int
batch_size
,
bool
optimize
,
bool
quantification
,
bool
loddable
)
{
int
batch_size
,
bool
loddable
)
{
if
(
loader_
.
get
()
==
nullptr
)
{
if
(
loader_
.
get
()
==
nullptr
)
{
loader_
=
std
::
make_shared
<
framework
::
Loader
<
D
type
,
P
>>
();
loader_
=
std
::
make_shared
<
framework
::
Loader
<
D
evice
,
T
>>
();
}
else
{
}
else
{
LOG
(
kLOG_INFO
)
<<
"loader inited"
;
LOG
(
kLOG_INFO
)
<<
"loader inited"
;
}
}
if
(
executor_
.
get
()
==
nullptr
)
{
if
(
executor_
.
get
()
==
nullptr
)
{
executor_
=
std
::
make_shared
<
framework
::
Executor
<
D
type
,
P
>>
(
executor_
=
std
::
make_shared
<
framework
::
Executor
<
D
evice
,
T
>>
(
loader_
->
Load
(
dirname
,
optimize
,
quantification
),
batch_size
,
optimize
,
loader_
->
Load
(
dirname
,
optimize
,
quantification
),
batch_size
,
optimize
,
loddable
);
loddable
);
}
else
{
}
else
{
LOG
(
kLOG_INFO
)
<<
"executor inited"
;
LOG
(
kLOG_INFO
)
<<
"executor inited"
;
}
}
return
true
;
return
PMSuccess
;
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
bool
PaddleMobile
<
Dtype
,
P
>::
Load
(
const
std
::
string
&
model_path
,
PMStatus
PaddleMobile
<
Device
,
T
>::
Load
(
const
std
::
string
&
model_path
,
const
std
::
string
&
para_path
,
bool
optimize
,
const
std
::
string
&
para_path
,
bool
quantification
,
int
batch_size
,
bool
optimize
,
bool
quantification
,
bool
loddable
)
{
int
batch_size
,
bool
loddable
)
{
if
(
loader_
.
get
()
==
nullptr
)
{
if
(
loader_
.
get
()
==
nullptr
)
{
loader_
=
std
::
make_shared
<
framework
::
Loader
<
D
type
,
P
>>
();
loader_
=
std
::
make_shared
<
framework
::
Loader
<
D
evice
,
T
>>
();
}
else
{
}
else
{
LOG
(
kLOG_INFO
)
<<
"loader inited"
;
LOG
(
kLOG_INFO
)
<<
"loader inited"
;
}
}
if
(
executor_
.
get
()
==
nullptr
)
{
if
(
executor_
.
get
()
==
nullptr
)
{
executor_
=
std
::
make_shared
<
framework
::
Executor
<
D
type
,
P
>>
(
executor_
=
std
::
make_shared
<
framework
::
Executor
<
D
evice
,
T
>>
(
loader_
->
Load
(
model_path
,
para_path
,
optimize
,
quantification
),
loader_
->
Load
(
model_path
,
para_path
,
optimize
,
quantification
),
batch_size
,
optimize
,
loddable
);
batch_size
,
optimize
,
loddable
);
}
else
{
}
else
{
LOG
(
kLOG_INFO
)
<<
"executor inited"
;
LOG
(
kLOG_INFO
)
<<
"executor inited"
;
}
}
return
true
;
return
PMSuccess
;
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
bool
PaddleMobile
<
D
type
,
P
>::
LoadCombinedMemory
(
size_t
model_len
,
bool
PaddleMobile
<
D
evice
,
T
>::
LoadCombinedMemory
(
size_t
model_len
,
const
uint8_t
*
model_buf
,
const
uint8_t
*
model_buf
,
size_t
combined_params_len
,
size_t
combined_params_len
,
uint8_t
*
combined_params_buf
)
{
uint8_t
*
combined_params_buf
)
{
int
batch_size
=
1
;
int
batch_size
=
1
;
bool
optimise
=
true
;
bool
optimise
=
true
;
bool
quantification
=
false
;
bool
quantification
=
false
;
if
(
loader_
.
get
()
==
nullptr
)
{
if
(
loader_
.
get
()
==
nullptr
)
{
loader_
=
std
::
make_shared
<
framework
::
Loader
<
D
type
,
P
>>
();
loader_
=
std
::
make_shared
<
framework
::
Loader
<
D
evice
,
T
>>
();
}
else
{
}
else
{
LOG
(
kLOG_INFO
)
<<
"loader inited"
;
LOG
(
kLOG_INFO
)
<<
"loader inited"
;
}
}
if
(
executor_
.
get
()
==
nullptr
)
{
if
(
executor_
.
get
()
==
nullptr
)
{
executor_
=
std
::
make_shared
<
framework
::
Executor
<
D
type
,
P
>>
(
executor_
=
std
::
make_shared
<
framework
::
Executor
<
D
evice
,
T
>>
(
loader_
->
LoadCombinedMemory
(
model_len
,
model_buf
,
combined_params_len
,
loader_
->
LoadCombinedMemory
(
model_len
,
model_buf
,
combined_params_len
,
combined_params_buf
,
optimise
,
combined_params_buf
,
optimise
,
quantification
),
quantification
),
...
@@ -96,38 +96,76 @@ bool PaddleMobile<Dtype, P>::LoadCombinedMemory(size_t model_len,
...
@@ -96,38 +96,76 @@ bool PaddleMobile<Dtype, P>::LoadCombinedMemory(size_t model_len,
LOG
(
kLOG_INFO
)
<<
"executor inited"
;
LOG
(
kLOG_INFO
)
<<
"executor inited"
;
}
}
return
true
;
return
PMSuccess
;
}
template
<
typename
Device
,
typename
T
>
PMStatus
PaddleMobile
<
Device
,
T
>::
Predict
(
const
framework
::
Tensor
&
input
)
{
std
::
vector
<
std
::
pair
<
std
::
string
,
framework
::
Tensor
>>
inputs
;
inputs
.
push_back
(
std
::
make_pair
(
"feed"
,
input
));
return
this
->
Predict
(
inputs
);
}
}
template
<
typename
Dtype
,
Precision
P
>
std
::
shared_ptr
<
framework
::
Tensor
>
PaddleMobile
<
Dtype
,
P
>::
Predict
(
template
<
typename
Device
,
typename
T
>
const
framework
::
Tensor
&
t
)
{
PMStatus
PaddleMobile
<
Device
,
T
>::
Predict
(
const
framework
::
LoDTensor
&
input
)
{
return
executor_
->
Predict
(
t
);
std
::
vector
<
std
::
pair
<
std
::
string
,
framework
::
LoDTensor
>>
inputs
;
inputs
.
push_back
(
std
::
make_pair
(
"feed"
,
input
));
return
this
->
Predict
(
inputs
);
}
template
<
typename
Device
,
typename
T
>
PMStatus
PaddleMobile
<
Device
,
T
>::
Predict
(
const
std
::
vector
<
std
::
pair
<
std
::
string
,
framework
::
Tensor
>>
&
inputs
)
{
return
executor_
->
Predict
(
inputs
);
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
typename
T
>
std
::
shared_ptr
<
framework
::
Tensor
>
PaddleMobile
<
Dtype
,
P
>::
PredictLod
(
PMStatus
PaddleMobile
<
Device
,
T
>::
Predict
(
const
framework
::
LoDTensor
&
t
)
{
const
std
::
vector
<
std
::
pair
<
std
::
string
,
framework
::
LoDTensor
>>
&
inputs
)
{
return
executor_
->
Predict
Lod
(
t
);
return
executor_
->
Predict
(
inputs
);
}
}
template
<
typename
Dtype
,
Precision
P
>
template
<
typename
Device
,
typename
T
>
std
::
vector
<
typename
PaddleMobile
<
Dtype
,
P
>::
Ptype
>
std
::
vector
<
T
>
PaddleMobile
<
Device
,
T
>::
Predict
(
PaddleMobile
<
Dtype
,
P
>::
Predict
(
const
std
::
vector
<
Ptype
>
&
input
,
const
std
::
vector
<
T
>
&
input
,
const
std
::
vector
<
int64_t
>
&
dims
)
{
const
std
::
vector
<
int64_t
>
&
dims
)
{
return
executor_
->
Predict
(
input
,
dims
);
return
executor_
->
Predict
(
input
,
dims
);
}
}
template
<
typename
Dtype
,
Precision
P
>
template
<
typename
Device
,
typename
T
>
void
PaddleMobile
<
Dtype
,
P
>::
Clear
()
{
PMStatus
PaddleMobile
<
Device
,
T
>::
Predict
()
{
return
executor_
->
Predict
();
}
template
<
typename
Device
,
typename
T
>
void
PaddleMobile
<
Device
,
T
>::
Feed
(
const
framework
::
Tensor
&
input
,
const
std
::
string
&
var_name
)
{
executor_
->
SetInput
(
input
,
var_name
);
}
template
<
typename
Device
,
typename
T
>
void
PaddleMobile
<
Device
,
T
>::
Feed
(
const
framework
::
LoDTensor
&
input
,
const
std
::
string
&
var_name
)
{
executor_
->
SetInput
(
input
,
var_name
);
}
typedef
std
::
shared_ptr
<
framework
::
LoDTensor
>
LoDTensorPtr
;
template
<
typename
Device
,
typename
T
>
LoDTensorPtr
PaddleMobile
<
Device
,
T
>::
Fetch
(
const
std
::
string
&
var_name
)
{
return
executor_
->
GetOutput
(
var_name
);
}
template
<
typename
Device
,
typename
T
>
void
PaddleMobile
<
Device
,
T
>::
Clear
()
{
executor_
=
nullptr
;
executor_
=
nullptr
;
loader_
=
nullptr
;
loader_
=
nullptr
;
}
}
template
<
typename
Dtype
,
Precision
P
>
double
PaddleMobile
<
Dtype
,
P
>::
GetPredictTime
()
{}
template
<
typename
Device
,
typename
T
>
double
PaddleMobile
<
Device
,
T
>::
GetPredictTime
()
{}
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
template
<
>
template
<
>
double
PaddleMobile
<
CPU
,
Precision
::
FP32
>::
GetPredictTime
()
{
double
PaddleMobile
<
CPU
,
float
>::
GetPredictTime
()
{
int
m
=
32
;
int
m
=
32
;
int
n
=
224
*
224
;
int
n
=
224
*
224
;
int
k
=
27
;
int
k
=
27
;
...
@@ -148,7 +186,8 @@ double PaddleMobile<CPU, Precision::FP32>::GetPredictTime() {
...
@@ -148,7 +186,8 @@ double PaddleMobile<CPU, Precision::FP32>::GetPredictTime() {
for
(
int
i
=
0
;
i
<
k
*
n
;
++
i
)
{
for
(
int
i
=
0
;
i
<
k
*
n
;
++
i
)
{
b
[
i
]
=
t1
+
rand
()
%
t2
;
// NOLINT
b
[
i
]
=
t1
+
rand
()
%
t2
;
// NOLINT
}
}
paddle_mobile
::
operators
::
math
::
Gemm
gemm
;
operators
::
math
::
Gemm
gemm
;
auto
time1
=
paddle_mobile
::
time
();
auto
time1
=
paddle_mobile
::
time
();
gemm
.
Sgemm
(
m
,
n
,
k
,
static_cast
<
float
>
(
1
),
a
,
lda
,
b
,
ldb
,
gemm
.
Sgemm
(
m
,
n
,
k
,
static_cast
<
float
>
(
1
),
a
,
lda
,
b
,
ldb
,
static_cast
<
float
>
(
0
),
c
,
ldc
,
false
,
static_cast
<
float
>
(
0
),
c
,
ldc
,
false
,
...
@@ -162,57 +201,51 @@ double PaddleMobile<CPU, Precision::FP32>::GetPredictTime() {
...
@@ -162,57 +201,51 @@ double PaddleMobile<CPU, Precision::FP32>::GetPredictTime() {
}
}
#endif
#endif
template
<
typename
Dtype
,
Precision
P
>
PaddleMobile
<
Dtype
,
P
>::~
PaddleMobile
()
{
executor_
=
nullptr
;
loader_
=
nullptr
;
}
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
template
<
typename
Device
,
T
P
>
template
<
typename
Dtype
,
Precision
P
>
void
PaddleMobile
<
Device
,
P
>::
InjectVariable
(
const
framework
::
Tensor
&
t
,
void
PaddleMobile
<
Dtype
,
P
>::
InjectVariable
(
const
framework
::
Tensor
&
t
,
std
::
string
var_name
)
{
std
::
string
var_name
)
{
executor_
->
InjectVariable
(
t
,
var_name
);
executor_
->
InjectVariable
(
t
,
var_name
);
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
T
P
>
void
PaddleMobile
<
D
typ
e
,
P
>::
FeedData
(
const
framework
::
Tensor
&
t
)
{
void
PaddleMobile
<
D
evic
e
,
P
>::
FeedData
(
const
framework
::
Tensor
&
t
)
{
executor_
->
FeedData
(
t
);
executor_
->
FeedData
(
t
);
}
}
template
<
typename
Dtype
,
Precision
P
>
template
<
typename
Device
,
T
P
>
std
::
shared_ptr
<
framework
::
Tensor
>
PaddleMobile
<
Dtype
,
P
>::
FetchResult
(
int
id
)
{
std
::
shared_ptr
<
framework
::
Tensor
>
PaddleMobile
<
Device
,
P
>::
FetchResult
(
int
id
)
{
return
executor_
->
FetchResult
(
id
);
return
executor_
->
FetchResult
(
id
);
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
T
P
>
void
PaddleMobile
<
D
typ
e
,
P
>::
Predict_From_To
(
int
start
,
int
end
)
{
void
PaddleMobile
<
D
evic
e
,
P
>::
Predict_From_To
(
int
start
,
int
end
)
{
executor_
->
Predict_From_To
(
start
,
end
);
executor_
->
Predict_From_To
(
start
,
end
);
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
T
P
>
void
PaddleMobile
<
D
typ
e
,
P
>::
Predict_From
(
int
start
)
{
void
PaddleMobile
<
D
evic
e
,
P
>::
Predict_From
(
int
start
)
{
executor_
->
Predict_From
(
start
);
executor_
->
Predict_From
(
start
);
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
T
P
>
void
PaddleMobile
<
D
typ
e
,
P
>::
Predict_To
(
int
end
)
{
void
PaddleMobile
<
D
evic
e
,
P
>::
Predict_To
(
int
end
)
{
executor_
->
Predict_To
(
end
);
executor_
->
Predict_To
(
end
);
}
}
#endif
#endif
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
static
std
::
mutex
lc
;
static
std
::
mutex
lc
;
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
T
P
>
void
PaddleMobile
<
D
typ
e
,
P
>::
SetCLPath
(
std
::
string
path
)
{
void
PaddleMobile
<
D
evic
e
,
P
>::
SetCLPath
(
std
::
string
path
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
lc
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
lc
);
if
(
framework
::
CLEngine
::
Instance
()
->
GetCLPath
()
==
""
)
{
if
(
framework
::
CLEngine
::
Instance
()
->
GetCLPath
()
==
""
)
{
framework
::
CLEngine
::
Instance
()
->
setClPath
(
path
);
framework
::
CLEngine
::
Instance
()
->
setClPath
(
path
);
}
}
}
}
template
<
>
template
<
>
double
PaddleMobile
<
GPU_CL
,
Precision
::
FP32
>::
GetPredictTime
()
{
double
PaddleMobile
<
GPU_CL
,
T
::
FP32
>::
GetPredictTime
()
{
cl_int
status
;
cl_int
status
;
cl_uint
nPlatform
;
cl_uint
nPlatform
;
clGetPlatformIDs
(
0
,
NULL
,
&
nPlatform
);
clGetPlatformIDs
(
0
,
NULL
,
&
nPlatform
);
...
@@ -410,8 +443,8 @@ double PaddleMobile<GPU_CL, Precision::FP32>::GetPredictTime() {
...
@@ -410,8 +443,8 @@ double PaddleMobile<GPU_CL, Precision::FP32>::GetPredictTime() {
return
-
1
;
return
-
1
;
}
}
}
}
template
<
typename
D
type
,
Precision
P
>
template
<
typename
D
evice
,
T
P
>
int
PaddleMobile
<
D
typ
e
,
P
>::
readText
(
int
PaddleMobile
<
D
evic
e
,
P
>::
readText
(
const
char
*
kernelPath
,
const
char
*
kernelPath
,
char
**
pcode
)
{
// 读取文本文件放入 pcode,返回字符串长度
char
**
pcode
)
{
// 读取文本文件放入 pcode,返回字符串长度
FILE
*
fp
;
FILE
*
fp
;
...
@@ -440,13 +473,11 @@ int PaddleMobile<Dtype, P>::readText(
...
@@ -440,13 +473,11 @@ int PaddleMobile<Dtype, P>::readText(
fclose
(
fp
);
fclose
(
fp
);
return
size
+
1
;
return
size
+
1
;
}
}
#endif
#endif
template
class
PaddleMobile
<
CPU
,
Precision
::
FP32
>;
template
class
PaddleMobile
<
CPU
,
float
>;
template
class
PaddleMobile
<
FPGA
,
Precision
::
FP32
>;
template
class
PaddleMobile
<
FPGA
,
float
>;
template
class
PaddleMobile
<
GPU_MALI
,
Precision
::
FP32
>;
template
class
PaddleMobile
<
GPU_MALI
,
float
>;
template
class
PaddleMobile
<
GPU_CL
,
float
>;
template
class
PaddleMobile
<
GPU_CL
,
Precision
::
FP32
>;
}
// namespace paddle_mobile
}
// namespace paddle_mobile
src/io/paddle_mobile.h
浏览文件 @
9729edac
...
@@ -16,6 +16,7 @@ limitations under the License. */
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include <memory>
#include <memory>
#include <string>
#include <string>
#include <utility>
#include <vector>
#include <vector>
#ifdef _OPENMP
#ifdef _OPENMP
#include <omp.h>
#include <omp.h>
...
@@ -32,43 +33,52 @@ limitations under the License. */
...
@@ -32,43 +33,52 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
paddle_mobile
{
template
<
typename
D
type
=
CPU
,
Precision
P
=
Precision
::
FP32
>
template
<
typename
D
evice
,
typename
T
=
float
>
class
PaddleMobile
{
class
PaddleMobile
{
typedef
typename
PrecisionTrait
<
P
>::
ptype
Ptype
;
public:
public:
PaddleMobile
()
{
PaddleMobile
()
{
#ifndef PADDLE_MOBILE_CL
#ifndef PADDLE_MOBILE_CL
bool
is_gpu
=
std
::
is_same
<
DeviceType
<
kGPU_CL
>
,
Dtype
>::
value
;
bool
is_gpu
=
std
::
is_same
<
DeviceType
<
kGPU_CL
>
,
Device
>::
value
;
PADDLE_MOBILE_ENFORCE
(
!
is_gpu
,
PADDLE_MOBILE_ENFORCE
(
!
is_gpu
,
"Please recompile with GPU_CL is on"
);
"Not Enable GPU in CmakeList but run gpu codes "
);
#endif
#endif
}
}
bool
Load
(
const
std
::
string
&
dirname
,
bool
optimize
=
false
,
~
PaddleMobile
()
{}
bool
quantification
=
false
,
int
batch_size
=
1
,
bool
loddable
=
false
);
PMStatus
Load
(
const
std
::
string
&
dirname
,
const
bool
optimize
=
false
,
const
bool
quantification
=
false
,
const
int
batch_size
=
1
,
const
bool
lod
=
false
);
PMStatus
Load
(
const
std
::
string
&
model_path
,
const
std
::
string
&
para_path
,
const
bool
optimize
=
false
,
const
bool
quantification
=
false
,
const
int
batch_size
=
1
,
const
bool
lod
=
false
);
PMStatus
Predict
(
const
framework
::
Tensor
&
input
);
PMStatus
Predict
(
const
framework
::
LoDTensor
&
input
);
bool
Load
(
const
std
::
string
&
model_path
,
const
std
::
string
&
para_path
,
PMStatus
Predict
(
bool
optimize
=
false
,
bool
quantification
=
false
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
framework
::
Tensor
>>
&
inputs
);
int
batch_size
=
1
,
bool
loddable
=
false
);
PMStatus
Predict
(
const
std
::
vector
<
std
::
pair
<
std
::
string
,
framework
::
LoDTensor
>>
&
inputs
);
std
::
shared_ptr
<
framework
::
Tensor
>
Predict
(
const
framework
::
Tensor
&
t
);
std
::
vector
<
T
>
Predict
(
const
std
::
vector
<
T
>
&
input
,
const
std
::
vector
<
int64_t
>
&
dims
);
PMStatus
Predict
();
std
::
shared_ptr
<
framework
::
Tensor
>
PredictLod
(
const
framework
::
LoDTensor
&
t
);
void
Feed
(
const
framework
::
LoDTensor
&
input
,
const
std
::
string
&
var_name
);
void
Feed
(
const
framework
::
Tensor
&
input
,
const
std
::
string
&
var_name
);
std
::
vector
<
Ptype
>
Predict
(
const
std
::
vector
<
Ptype
>
&
input
,
typedef
std
::
shared_ptr
<
framework
::
LoDTensor
>
LoDTensorPtr
;
const
std
::
vector
<
int64_t
>
&
dims
);
LoDTensorPtr
Fetch
(
const
std
::
string
&
var_name
);
LoDTensorPtr
Fetch
()
{
return
Fetch
(
"fetch"
);
}
bool
LoadCombinedMemory
(
size_t
model_len
,
const
uint8_t
*
model_buf
,
bool
LoadCombinedMemory
(
size_t
model_len
,
const
uint8_t
*
model_buf
,
size_t
combined_params_len
,
size_t
combined_params_len
,
uint8_t
*
combined_params_buf
);
uint8_t
*
combined_params_buf
);
void
SetThreadNum
(
int
num
);
void
SetThreadNum
(
int
count
);
void
Clear
();
void
Clear
();
double
GetPredictTime
();
double
GetPredictTime
();
~
PaddleMobile
();
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
void
InjectVariable
(
const
framework
::
Tensor
&
t
,
std
::
string
var_name
);
void
InjectVariable
(
const
framework
::
Tensor
&
t
,
std
::
string
var_name
);
void
FeedData
(
const
framework
::
Tensor
&
t
);
void
FeedData
(
const
framework
::
Tensor
&
t
);
...
@@ -79,15 +89,15 @@ class PaddleMobile {
...
@@ -79,15 +89,15 @@ class PaddleMobile {
#endif
#endif
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
public:
public:
// NOLINT
void
SetCLPath
(
std
::
string
cl_path
);
void
SetCLPath
(
std
::
string
cl_path
);
int
readText
(
const
char
*
kernelPath
,
int
readText
(
const
char
*
kernelPath
,
char
**
pcode
);
// 读取文本文件放入 pcode,返回字符串长度
char
**
pcode
);
// 读取文本文件放入 pcode,返回字符串长度
#endif
#endif
private:
private:
std
::
shared_ptr
<
framework
::
Loader
<
D
type
,
P
>>
loader_
;
std
::
shared_ptr
<
framework
::
Loader
<
D
evice
,
T
>>
loader_
;
std
::
shared_ptr
<
framework
::
Executor
<
D
type
,
P
>>
executor_
;
std
::
shared_ptr
<
framework
::
Executor
<
D
evice
,
T
>>
executor_
;
};
};
}
// namespace paddle_mobile
}
// namespace paddle_mobile
src/io/paddle_test_inference_api.cpp
浏览文件 @
9729edac
...
@@ -14,10 +14,12 @@ limitations under the License. */
...
@@ -14,10 +14,12 @@ limitations under the License. */
#include "io/paddle_test_inference_api.h"
#include "io/paddle_test_inference_api.h"
#include "io/paddle_mobile.h"
#include "io/paddle_mobile.h"
namespace
paddle_mobile
{
namespace
paddle_mobile
{
template
<
typename
Dtype
,
Precision
P
>
double
PaddleTester
<
Dtype
,
P
>::
CaculatePredictTime
(
std
::
string
*
cl_path
)
{
template
<
typename
Device
,
typename
T
>
PaddleMobile
<
Dtype
,
P
>
paddle_mobile
;
double
PaddleTester
<
Device
,
T
>::
CaculatePredictTime
(
std
::
string
*
cl_path
)
{
PaddleMobile
<
Device
,
T
>
paddle_mobile
;
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
if
(
cl_path
)
{
if
(
cl_path
)
{
paddle_mobile
.
SetCLPath
(
*
cl_path
);
paddle_mobile
.
SetCLPath
(
*
cl_path
);
...
@@ -26,10 +28,10 @@ double PaddleTester<Dtype, P>::CaculatePredictTime(std::string *cl_path) {
...
@@ -26,10 +28,10 @@ double PaddleTester<Dtype, P>::CaculatePredictTime(std::string *cl_path) {
#endif
#endif
return
paddle_mobile
.
GetPredictTime
();
return
paddle_mobile
.
GetPredictTime
();
}
}
template
class
PaddleTester
<
CPU
,
Precision
::
FP32
>;
template
class
PaddleTester
<
CPU
,
float
>;
template
class
PaddleTester
<
FPGA
,
Precision
::
FP32
>;
template
class
PaddleTester
<
FPGA
,
float
>;
template
class
PaddleTester
<
GPU_MALI
,
Precision
::
FP32
>;
template
class
PaddleTester
<
GPU_MALI
,
float
>;
template
class
PaddleTester
<
GPU_CL
,
Precision
::
FP32
>;
template
class
PaddleTester
<
GPU_CL
,
float
>;
}
// namespace paddle_mobile
}
// namespace paddle_mobile
src/io/paddle_test_inference_api.h
浏览文件 @
9729edac
...
@@ -20,10 +20,13 @@ limitations under the License. */
...
@@ -20,10 +20,13 @@ limitations under the License. */
*/
*/
#pragma once
#pragma once
#include "common/types.h"
#include "common/types.h"
#include "string"
#include "string"
namespace
paddle_mobile
{
namespace
paddle_mobile
{
template
<
typename
Dtype
,
Precision
P
=
Precision
::
FP32
>
template
<
typename
Device
,
typename
T
=
float
>
class
PaddleTester
{
class
PaddleTester
{
public:
public:
double
CaculatePredictTime
(
std
::
string
*
cl_path
=
nullptr
);
double
CaculatePredictTime
(
std
::
string
*
cl_path
=
nullptr
);
...
...
test/CMakeLists.txt
浏览文件 @
9729edac
...
@@ -375,5 +375,8 @@ if (NOT FOUND_MATCH)
...
@@ -375,5 +375,8 @@ if (NOT FOUND_MATCH)
# gen test
# gen test
ADD_EXECUTABLE
(
test-super net/test_super.cpp test_helper.h test_include.h
)
ADD_EXECUTABLE
(
test-super net/test_super.cpp test_helper.h test_include.h
)
target_link_libraries
(
test-super paddle-mobile
)
target_link_libraries
(
test-super paddle-mobile
)
#add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)
# gen test
ADD_EXECUTABLE
(
test-ocr net/test_ocr.cpp test_helper.h test_include.h
)
target_link_libraries
(
test-ocr paddle-mobile
)
endif
()
endif
()
test/executor_for_test.h
浏览文件 @
9729edac
...
@@ -39,6 +39,7 @@ using paddle_mobile::framework::Tensor;
...
@@ -39,6 +39,7 @@ using paddle_mobile::framework::Tensor;
using
paddle_mobile
::
framework
::
Variable
;
using
paddle_mobile
::
framework
::
Variable
;
using
std
::
string
;
using
std
::
string
;
using
std
::
vector
;
using
std
::
vector
;
template
<
typename
DeviceType
,
typename
OpType
>
template
<
typename
DeviceType
,
typename
OpType
>
class
Executor4Test
:
public
Executor
<
DeviceType
>
{
class
Executor4Test
:
public
Executor
<
DeviceType
>
{
public:
public:
...
@@ -48,20 +49,19 @@ class Executor4Test : public Executor<DeviceType> {
...
@@ -48,20 +49,19 @@ class Executor4Test : public Executor<DeviceType> {
this
->
use_optimize_
=
use_optimize
;
this
->
use_optimize_
=
use_optimize
;
this
->
program_
=
p
;
this
->
program_
=
p
;
if
(
this
->
use_optimize_
)
{
if
(
this
->
use_optimize_
)
{
this
->
to_predict_program
_
=
this
->
program_
.
optimizeProgram
;
this
->
program_desc
_
=
this
->
program_
.
optimizeProgram
;
}
else
{
}
else
{
this
->
to_predict_program
_
=
this
->
program_
.
originProgram
;
this
->
program_desc
_
=
this
->
program_
.
originProgram
;
}
}
if
(
this
->
program_
.
originProgram
==
nullptr
)
{
if
(
this
->
program_
.
originProgram
==
nullptr
)
{
LOG
(
paddle_mobile
::
LogLevel
::
kLOG_ERROR
)
LOG
(
paddle_mobile
::
LogLevel
::
kLOG_ERROR
)
<<
"program_desc_ == nullptr"
;
<<
"to_predict_program_ == nullptr"
;
}
}
const
std
::
vector
<
std
::
shared_ptr
<
BlockDesc
>>
blocks
=
const
std
::
vector
<
std
::
shared_ptr
<
BlockDesc
>>
blocks
=
this
->
to_predict_program
_
->
Blocks
();
this
->
program_desc
_
->
Blocks
();
for
(
std
::
shared_ptr
<
BlockDesc
>
block_desc
:
blocks
)
{
for
(
int
block_id
=
0
;
block_id
<
blocks
.
size
();
++
block_id
)
{
std
::
vector
<
std
::
shared_ptr
<
OpDesc
>>
ops
=
block
_desc
->
Ops
();
std
::
vector
<
std
::
shared_ptr
<
OpDesc
>>
ops
=
block
s
[
block_id
]
->
Ops
();
for
(
int
i
=
0
;
i
<
ops
.
size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
ops
.
size
();
++
i
)
{
auto
op
=
ops
[
i
];
auto
op
=
ops
[
i
];
if
(
op
->
Type
()
==
op_type
)
{
if
(
op
->
Type
()
==
op_type
)
{
...
@@ -73,18 +73,16 @@ class Executor4Test : public Executor<DeviceType> {
...
@@ -73,18 +73,16 @@ class Executor4Test : public Executor<DeviceType> {
paddle_mobile
::
framework
::
OpRegistry
<
DeviceType
>::
CreateOp
(
paddle_mobile
::
framework
::
OpRegistry
<
DeviceType
>::
CreateOp
(
op
->
Type
(),
op
->
GetInputs
(),
op
->
GetOutputs
(),
op
->
Type
(),
op
->
GetInputs
(),
op
->
GetOutputs
(),
op
->
GetAttrMap
(),
this
->
program_
.
scope
);
op
->
GetAttrMap
(),
this
->
program_
.
scope
);
this
->
ops_of_block_
[
*
block_desc
.
get
()
].
push_back
(
op_ptr
);
this
->
ops_of_block_
[
block_id
].
push_back
(
op_ptr
);
break
;
break
;
}
}
}
}
}
}
this
->
InitMemory
();
this
->
InitMemory
();
for
(
const
auto
&
ops
:
this
->
ops_of_block_
)
{
std
::
shared_ptr
<
paddle_mobile
::
framework
::
BlockDesc
>
to_predict_block
=
for
(
const
auto
&
op
:
ops
)
{
this
->
to_predict_program_
->
Block
(
0
);
op
->
Init
();
auto
&
ops
=
this
->
ops_of_block_
[
*
to_predict_block
.
get
()];
}
for
(
const
auto
&
op
:
ops
)
{
op
->
Init
();
}
}
}
}
...
@@ -117,12 +115,10 @@ class Executor4Test : public Executor<DeviceType> {
...
@@ -117,12 +115,10 @@ class Executor4Test : public Executor<DeviceType> {
output_tensor_sptrs
[
i
].
reset
(
output_tensors
[
i
]);
output_tensor_sptrs
[
i
].
reset
(
output_tensors
[
i
]);
}
}
std
::
shared_ptr
<
paddle_mobile
::
framework
::
BlockDesc
>
to_predict_block
=
for
(
auto
&
ops
:
this
->
ops_of_block_
)
{
this
->
to_predict_program_
->
Block
(
0
);
for
(
auto
&
op
:
ops
)
{
for
(
int
j
=
0
;
j
<
this
->
ops_of_block_
[
*
to_predict_block
.
get
()].
size
();
op
->
Run
();
++
j
)
{
}
auto
op
=
this
->
ops_of_block_
[
*
to_predict_block
.
get
()][
j
];
op
->
Run
();
}
}
return
output_tensor_sptrs
;
return
output_tensor_sptrs
;
...
@@ -139,14 +135,11 @@ class Executor4Test : public Executor<DeviceType> {
...
@@ -139,14 +135,11 @@ class Executor4Test : public Executor<DeviceType> {
auto
*
output_tensor
=
con_output
->
GetMutable
<
LoDTensor
>
();
auto
*
output_tensor
=
con_output
->
GetMutable
<
LoDTensor
>
();
output_tensor
->
mutable_data
<
float
>
(
dDim
);
output_tensor
->
mutable_data
<
float
>
(
dDim
);
std
::
shared_ptr
<
paddle_mobile
::
framework
::
BlockDesc
>
to_predict_block
=
for
(
auto
&
ops
:
this
->
ops_of_block_
)
{
this
->
to_predict_program_
->
Block
(
0
);
for
(
auto
&
op
:
ops
)
{
for
(
int
j
=
0
;
j
<
this
->
ops_of_block_
[
*
to_predict_block
.
get
()].
size
();
op
->
Run
();
++
j
)
{
}
auto
op
=
this
->
ops_of_block_
[
*
to_predict_block
.
get
()][
j
];
op
->
Run
();
}
}
return
std
::
make_shared
<
paddle_mobile
::
framework
::
Tensor
>
(
return
std
::
make_shared
<
paddle_mobile
::
framework
::
Tensor
>
(
paddle_mobile
::
framework
::
Tensor
(
*
output_tensor
));
paddle_mobile
::
framework
::
Tensor
(
*
output_tensor
));
}
}
...
...
test/net/test_benchmark.cpp
浏览文件 @
9729edac
...
@@ -52,15 +52,16 @@ int main(int argc, char* argv[]) {
...
@@ -52,15 +52,16 @@ int main(int argc, char* argv[]) {
SetupTensor
<
float
>
(
&
input
,
in_shape
,
0.
f
,
255.
f
);
SetupTensor
<
float
>
(
&
input
,
in_shape
,
0.
f
,
255.
f
);
// warmup
// warmup
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
output
=
paddle_mobile
.
Predict
(
input
);
paddle_mobile
.
Predict
(
input
);
}
}
auto
time3
=
time
();
auto
time3
=
time
();
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
output
=
paddle_mobile
.
Predict
(
input
);
paddle_mobile
.
Predict
(
input
);
}
}
auto
time4
=
time
();
auto
time4
=
time
();
std
::
cout
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
/
10
<<
"ms
\n
"
;
std
::
cout
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
/
10
<<
"ms
\n
"
;
std
::
ostringstream
os
(
"output tensor size: "
);
std
::
ostringstream
os
(
"output tensor size: "
);
output
=
paddle_mobile
.
Fetch
();
os
<<
output
->
numel
()
<<
"
\n
"
<<
output
->
data
<
float
>
()[
0
];
os
<<
output
->
numel
()
<<
"
\n
"
<<
output
->
data
<
float
>
()[
0
];
for
(
int
i
=
1
;
i
<
output
->
numel
();
++
i
)
{
for
(
int
i
=
1
;
i
<
output
->
numel
();
++
i
)
{
os
<<
", "
<<
output
->
data
<
float
>
()[
i
];
os
<<
", "
<<
output
->
data
<
float
>
()[
i
];
...
...
test/net/test_eng.cpp
浏览文件 @
9729edac
...
@@ -36,11 +36,11 @@ int main() {
...
@@ -36,11 +36,11 @@ int main() {
input_tensor
.
data
<
float
>
()
+
input_tensor
.
numel
());
input_tensor
.
data
<
float
>
()
+
input_tensor
.
numel
());
// 预热十次
// 预热十次
for
(
int
i
=
0
;
i
<
1
;
++
i
)
{
for
(
int
i
=
0
;
i
<
1
;
++
i
)
{
paddle_mobile
.
Predict
Lod
(
input_tensor
);
paddle_mobile
.
Predict
(
input_tensor
);
}
}
auto
time3
=
time
();
auto
time3
=
time
();
for
(
int
i
=
0
;
i
<
1
;
++
i
)
{
for
(
int
i
=
0
;
i
<
1
;
++
i
)
{
paddle_mobile
.
Predict
Lod
(
input_tensor
);
paddle_mobile
.
Predict
(
input_tensor
);
}
}
auto
time4
=
time
();
auto
time4
=
time
();
std
::
cout
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
<<
"ms"
std
::
cout
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
<<
"ms"
...
...
test/net/test_googlenet.cpp
浏览文件 @
9729edac
...
@@ -41,12 +41,12 @@ int main(int argc, char* argv[]) {
...
@@ -41,12 +41,12 @@ int main(int argc, char* argv[]) {
#endif
#endif
paddle_mobile
.
SetThreadNum
(
thread_num
);
paddle_mobile
.
SetThreadNum
(
thread_num
);
auto
time1
=
time
();
auto
time1
=
time
();
if
(
paddle_mobile
.
Load
(
g_googlenet
,
optimize
))
{
std
::
vector
<
float
>
output
;
if
(
paddle_mobile
.
Load
(
g_googlenet
,
optimize
,
false
,
1
,
true
))
{
auto
time2
=
paddle_mobile
::
time
();
auto
time2
=
paddle_mobile
::
time
();
std
::
cout
<<
"load cost :"
<<
paddle_mobile
::
time_diff
(
time1
,
time2
)
<<
"ms"
std
::
cout
<<
"load cost :"
<<
paddle_mobile
::
time_diff
(
time1
,
time2
)
<<
"ms"
<<
std
::
endl
;
<<
std
::
endl
;
std
::
vector
<
float
>
input
;
std
::
vector
<
float
>
input
;
std
::
vector
<
float
>
output
;
std
::
vector
<
int64_t
>
dims
{
1
,
3
,
224
,
224
};
std
::
vector
<
int64_t
>
dims
{
1
,
3
,
224
,
224
};
if
(
feed_shape
)
{
if
(
feed_shape
)
{
sscanf
(
feed_shape
,
"%d,%d,%d"
,
&
dims
[
1
],
&
dims
[
2
],
&
dims
[
3
]);
sscanf
(
feed_shape
,
"%d,%d,%d"
,
&
dims
[
1
],
&
dims
[
2
],
&
dims
[
3
]);
...
...
test/net/test_nlp.cpp
浏览文件 @
9729edac
...
@@ -48,8 +48,8 @@ int main() {
...
@@ -48,8 +48,8 @@ int main() {
DLOG
<<
"words lod 22: "
<<
words
.
lod
();
DLOG
<<
"words lod 22: "
<<
words
.
lod
();
auto
time3
=
time
();
auto
time3
=
time
();
for
(
int
i
=
0
;
i
<
1
;
++
i
)
{
for
(
int
i
=
0
;
i
<
1
;
++
i
)
{
auto
vec_result
=
paddle_mobile
.
PredictLod
(
words
);
paddle_mobile
.
Predict
(
words
);
DLOG
<<
*
vec_result
;
DLOG
<<
*
paddle_mobile
.
Fetch
()
;
}
}
auto
time4
=
time
();
auto
time4
=
time
();
std
::
cout
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
/
1
<<
"ms"
std
::
cout
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
/
1
<<
"ms"
...
@@ -84,8 +84,8 @@ int main() {
...
@@ -84,8 +84,8 @@ int main() {
DLOG
<<
"words lod 22: "
<<
words
.
lod
();
DLOG
<<
"words lod 22: "
<<
words
.
lod
();
auto
time3
=
time
();
auto
time3
=
time
();
for
(
int
i
=
0
;
i
<
1
;
++
i
)
{
for
(
int
i
=
0
;
i
<
1
;
++
i
)
{
auto
vec_result
=
paddle_mobile
.
PredictLod
(
words
);
paddle_mobile
.
Predict
(
words
);
DLOG
<<
*
vec_result
;
DLOG
<<
*
paddle_mobile
.
Fetch
()
;
}
}
auto
time4
=
time
();
auto
time4
=
time
();
std
::
cout
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
/
1
<<
"ms"
std
::
cout
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
/
1
<<
"ms"
...
...
test/net/test_ocr.cpp
0 → 100644
浏览文件 @
9729edac
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <fstream>
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
void
load_images
(
const
char
*
image_dir
,
const
char
*
images_list
,
std
::
vector
<
std
::
string
>
*
image_names
,
std
::
vector
<
std
::
pair
<
int
,
int
>>
*
image_shapes
)
{
int
height
,
width
;
std
::
string
filename
;
std
::
ifstream
if_list
(
images_list
,
std
::
ios
::
in
);
while
(
!
if_list
.
eof
())
{
if_list
>>
height
>>
width
>>
filename
;
image_shapes
->
push_back
(
std
::
make_pair
(
height
,
width
));
image_names
->
push_back
(
filename
);
}
}
int
main
(
int
argc
,
char
**
argv
)
{
if
(
argc
<
4
)
{
std
::
cerr
<<
"Usage: ./test_ocr model_dir image_dir images_list."
<<
std
::
endl
;
return
1
;
}
char
*
model_dir
=
argv
[
1
];
char
*
image_dir
=
argv
[
2
];
char
*
images_list
=
argv
[
3
];
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
>
paddle_mobile
;
paddle_mobile
.
SetThreadNum
(
8
);
auto
isok
=
paddle_mobile
.
Load
(
std
::
string
(
model_dir
)
+
"/model"
,
std
::
string
(
model_dir
)
+
"/params"
,
true
,
false
,
1
,
true
);
DLOG
<<
"pass init model"
;
std
::
vector
<
std
::
string
>
image_names
;
std
::
vector
<
std
::
pair
<
int
,
int
>>
image_shapes
;
load_images
(
image_dir
,
images_list
,
&
image_names
,
&
image_shapes
);
DLOG
<<
"pass load images"
;
for
(
int
i
=
0
;
i
<
image_names
.
size
();
i
++
)
{
std
::
string
file_name
=
image_names
[
i
];
std
::
vector
<
float
>
input
;
std
::
vector
<
int64_t
>
dims
{
1
,
1
,
48
,
512
};
dims
[
2
]
=
image_shapes
[
i
].
first
;
dims
[
3
]
=
image_shapes
[
i
].
second
;
// load input image
std
::
string
img_path
=
std
::
string
(
image_dir
)
+
"/"
+
file_name
;
std
::
cerr
<<
"img_path: "
<<
img_path
<<
std
::
endl
;
std
::
cerr
<<
"shape = ["
<<
dims
[
0
]
<<
", "
<<
dims
[
1
]
<<
", "
<<
dims
[
2
]
<<
", "
<<
dims
[
3
]
<<
"]"
<<
std
::
endl
;
GetInput
<
float
>
(
img_path
,
&
input
,
dims
);
// predict
auto
output
=
paddle_mobile
.
Predict
(
input
,
dims
);
// print result
std
::
cerr
<<
file_name
<<
std
::
endl
;
std
::
cerr
<<
output
[
0
];
for
(
int
j
=
1
;
j
<
output
.
size
();
++
j
)
{
std
::
cerr
<<
" "
<<
output
[
j
];
}
std
::
cerr
<<
std
::
endl
;
}
return
0
;
}
tools/pre-commit.hooks/cpplint.hook
浏览文件 @
9729edac
...
@@ -5,7 +5,7 @@ TOTAL_ERRORS=0
...
@@ -5,7 +5,7 @@ TOTAL_ERRORS=0
# The trick to remove deleted files: https://stackoverflow.com/a/2413151
# The trick to remove deleted files: https://stackoverflow.com/a/2413151
for
file
in
$(
git diff
--cached
--name-status
|
awk
'$1 != "D" {print $2}'
|
\
for
file
in
$(
git diff
--cached
--name-status
|
awk
'$1 != "D" {print $2}'
|
\
grep
-v
".pb.cpp"
|
grep
-v
".pb.h"
|
grep
-v
".pb-c.h"
|
grep
-v
".pb-c.c"
|
\
grep
-v
".pb.cpp"
|
grep
-v
".pb.h"
|
grep
-v
".pb-c.h"
|
grep
-v
".pb-c.c"
|
\
grep
-v
"protobuf-c.h"
|
grep
-v
"protobuf-c.c"
)
;
do
grep
-v
"protobuf-c.h"
|
grep
-v
"protobuf-c.c"
|
grep
-v
"paddle_mobile_jni.cpp"
)
;
do
cpplint
$file
;
cpplint
$file
;
TOTAL_ERRORS
=
$(
expr
$TOTAL_ERRORS
+
$?
)
;
TOTAL_ERRORS
=
$(
expr
$TOTAL_ERRORS
+
$?
)
;
done
done
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录