Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
382e2ec9
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
382e2ec9
编写于
10月 07, 2018
作者:
陈
陈后江
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine
上级
d52a8d01
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
137 addition
and
126 deletion
+137
-126
src/common/util.cpp
src/common/util.cpp
+31
-0
src/common/util.h
src/common/util.h
+24
-0
src/framework/program/program.h
src/framework/program/program.h
+0
-2
src/io/executor.cpp
src/io/executor.cpp
+58
-96
src/io/executor.h
src/io/executor.h
+24
-20
src/io/loader.h
src/io/loader.h
+0
-8
未找到文件。
src/common/util.cpp
0 → 100644
浏览文件 @
382e2ec9
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "common/util.h"
char
*
ReadFileToBuff
(
std
::
string
filename
)
{
FILE
*
file
=
fopen
(
filename
.
c_str
(),
"rb"
);
PADDLE_MOBILE_ENFORCE
(
file
!=
nullptr
,
"can't open file: %s "
,
filename
.
c_str
());
fseek
(
file
,
0
,
SEEK_END
);
int64_t
size
=
ftell
(
file
);
PADDLE_MOBILE_ENFORCE
(
size
>
0
,
"file should not be empty"
);
rewind
(
file
);
char
*
data
=
new
char
[
size
];
size_t
bytes_read
=
fread
(
data
,
1
,
size
,
file
);
PADDLE_MOBILE_ENFORCE
(
bytes_read
==
size
,
"read binary file bytes do not match with fseek"
);
fclose
(
file
);
return
data
;
}
src/common/util.h
0 → 100644
浏览文件 @
382e2ec9
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "common/enforce.h"
#include <string>
namespace
paddle_mobile
{
char
*
ReadFileToBuff
(
std
::
string
filename
);
}
// namespace paddle_mobile
src/framework/program/program.h
浏览文件 @
382e2ec9
...
@@ -33,8 +33,6 @@ class Program {
...
@@ -33,8 +33,6 @@ class Program {
bool
quantification
=
false
;
bool
quantification
=
false
;
size_t
combined_params_len
;
size_t
combined_params_len
;
const
uint8_t
*
combined_params_buf
;
const
uint8_t
*
combined_params_buf
;
private:
};
};
}
// namespace framework
}
// namespace framework
...
...
src/io/executor.cpp
浏览文件 @
382e2ec9
...
@@ -32,34 +32,15 @@ namespace paddle_mobile {
...
@@ -32,34 +32,15 @@ namespace paddle_mobile {
using
framework
::
Variable
;
using
framework
::
Variable
;
char
*
Get_binary_data
(
std
::
string
filename
)
{
FILE
*
file
=
fopen
(
filename
.
c_str
(),
"rb"
);
PADDLE_MOBILE_ENFORCE
(
file
!=
nullptr
,
"can't open file: %s "
,
filename
.
c_str
());
fseek
(
file
,
0
,
SEEK_END
);
int64_t
size
=
ftell
(
file
);
PADDLE_MOBILE_ENFORCE
(
size
>
0
,
"size is too small"
);
rewind
(
file
);
char
*
data
=
new
char
[
size
];
size_t
bytes_read
=
fread
(
data
,
1
,
size
,
file
);
PADDLE_MOBILE_ENFORCE
(
bytes_read
==
size
,
"read binary file bytes do not match with fseek"
);
fclose
(
file
);
return
data
;
}
template
<
typename
Dtype
,
Precision
P
>
template
<
typename
Dtype
,
Precision
P
>
Executor
<
Dtype
,
P
>::
Executor
(
const
framework
::
Program
<
Dtype
>
p
,
Executor
<
Dtype
,
P
>::
Executor
(
const
framework
::
Program
<
Dtype
>
p
,
const
bool
use_optimize
,
const
bool
use_optimize
,
const
bool
loddable
)
const
bool
loddable
)
:
program_
(
p
),
use_optimize_
(
use_optimize
),
loddable_
(
loddable
)
{
:
program_
(
p
),
use_optimize_
(
use_optimize
),
loddable_
(
loddable
)
{
if
(
use_optimize_
)
{
to_predict_program_
=
program_
.
optimizeProgram
;
}
else
{
to_predict_program_
=
program_
.
originProgram
;
}
Variable
*
variable_ptr
=
program_
.
scope
->
Var
(
"batch_size"
);
Variable
*
variable_ptr
=
program_
.
scope
->
Var
(
"batch_size"
);
variable_ptr
->
SetValue
<
int
>
(
1
);
variable_ptr
->
SetValue
<
int
>
(
1
);
to_predict_program_
=
use_optimize_
?
program_
.
optimizeProgram
:
program_
.
originProgram
;
PADDLE_MOBILE_ENFORCE
(
to_predict_program_
!=
nullptr
,
PADDLE_MOBILE_ENFORCE
(
to_predict_program_
!=
nullptr
,
"to_predict_program_ == NULL!"
);
"to_predict_program_ == NULL!"
);
const
std
::
vector
<
std
::
shared_ptr
<
framework
::
BlockDesc
>>
&
blocks
=
const
std
::
vector
<
std
::
shared_ptr
<
framework
::
BlockDesc
>>
&
blocks
=
...
@@ -75,8 +56,8 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p,
...
@@ -75,8 +56,8 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p,
auto
op_base
=
framework
::
OpRegistry
<
Dtype
>::
CreateOp
(
auto
op_base
=
framework
::
OpRegistry
<
Dtype
>::
CreateOp
(
op
->
Type
(),
op
->
GetInputs
(),
op
->
GetOutputs
(),
op
->
GetAttrMap
(),
op
->
Type
(),
op
->
GetInputs
(),
op
->
GetOutputs
(),
op
->
GetAttrMap
(),
program_
.
scope
);
program_
.
scope
);
//
use pre_infershape to pre resize , but if u use an lod mode tensor u
//
infer shape to reshape tensor before predict,
//
need to resiz
e in runtime
//
but for lod tensor, it will need to reshap
e in runtime
if
(
!
loddable_
)
{
if
(
!
loddable_
)
{
op_base
->
InferShape
();
op_base
->
InferShape
();
}
}
...
@@ -96,75 +77,74 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p,
...
@@ -96,75 +77,74 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p,
}
}
}
}
// should use istream to keep offset for data
template
<
typename
Dtype
>
template
<
typename
Dtype
>
void
LoadMemInternal
(
const
void
*
data
,
framework
::
LoDTensor
*
tensor
)
{
void
LoadMemInternal
(
void
*
*
data
,
framework
::
LoDTensor
*
tensor
)
{
c
onst
char
*
data_buf
=
static_cast
<
const
char
*>
(
data
);
c
har
**
data_buf
=
reinterpret_cast
<
char
*
*>
(
data
);
int64_t
size
=
tensor
->
numel
();
int64_t
size
=
tensor
->
numel
();
Dtype
*
tensor_data
=
tensor
->
mutable_data
<
Dtype
>
();
Dtype
*
tensor_data
=
tensor
->
mutable_data
<
Dtype
>
();
// stored as low precision, but compute with float
// TODO(hjchen2) must consider signed and unsigned
if
(
0
)
{
if
(
0
)
{
// TODO should be moved into operator init function
float
min_value
;
float
min_value
;
float
max_value
;
float
max_value
;
memcpy
(
&
min_value
,
data_buf
,
sizeof
(
float
));
memcpy
(
&
min_value
,
data_buf
,
sizeof
(
float
));
memcpy
(
&
max_value
,
data_buf
+
sizeof
(
float
),
sizeof
(
float
));
memcpy
(
&
max_value
,
data_buf
+
sizeof
(
float
),
sizeof
(
float
));
data_buf
+=
2
*
sizeof
(
float
);
data_buf
+=
2
*
sizeof
(
float
);
const
float
factor
=
(
max_value
-
min_value
)
/
255.0
;
const
float
factor
=
(
max_value
-
min_value
)
/
255.0
;
const
uint8_t
*
uint8_data
=
reinterpret_cast
<
const
uint8_t
*>
(
data_buf
);
const
uint8_t
*
uint8_data
=
reinterpret_cast
<
uint8_t
*>
(
data_buf
);
for
(
int
k
=
0
;
k
<
size
;
++
k
)
{
for
(
int
k
=
0
;
k
<
size
;
++
k
)
{
tensor_data
[
k
]
=
uint8_data
[
k
]
*
factor
+
min_value
;
tensor_data
[
k
]
=
uint8_data
[
k
]
*
factor
+
min_value
;
}
}
data_buf
+=
size
*
sizeof
(
uint8_t
);
data_buf
+=
size
*
sizeof
(
uint8_t
);
}
else
{
}
else
{
memcpy
(
tensor_data
,
data_buf
,
size
*
sizeof
(
Dtype
));
memcpy
(
tensor_data
,
*
data_buf
,
size
*
sizeof
(
Dtype
));
data_buf
+=
size
*
sizeof
(
Dtype
);
*
data_buf
+=
size
*
sizeof
(
Dtype
);
}
}
}
}
template
<
typename
Dtype
,
Precision
P
>
template
<
typename
Dtype
,
Precision
P
>
void
Executor
<
Dtype
,
P
>::
LoadMemory
(
const
void
*
data
,
void
Executor
<
Dtype
,
P
>::
LoadMemory
(
const
framework
::
VarDesc
var_desc
,
void
**
data
,
framework
::
LoDTensor
*
tensor
)
{
const
std
::
shared_ptr
<
framework
::
VarDesc
>
var_desc
,
const
char
*
data_buf
=
static_cast
<
const
char
*>
(
data
);
framework
::
LoDTensor
*
tensor
)
{
char
**
data_buf
=
reinterpret_cast
<
char
**>
(
data
);
// version
// version
uint32_t
version
=
*
(
reinterpret_cast
<
const
uint32_t
*>
(
data_buf
));
uint32_t
version
=
*
(
reinterpret_cast
<
uint32_t
*>
(
*
data_buf
));
data_buf
+=
sizeof
(
uint32_t
);
*
data_buf
+=
sizeof
(
uint32_t
);
// lod information
// lod information
uint64_t
lod_level
=
*
(
reinterpret_cast
<
const
uint64_t
*>
(
data_buf
));
uint64_t
lod_level
=
*
(
reinterpret_cast
<
uint64_t
*>
(
*
data_buf
));
data_buf
+=
sizeof
(
uint64_t
);
*
data_buf
+=
sizeof
(
uint64_t
);
auto
*
lod
=
tensor
->
mutable_lod
();
auto
*
lod
=
tensor
->
mutable_lod
();
lod
->
resize
(
lod_level
);
lod
->
resize
(
lod_level
);
for
(
uint64_t
i
=
0
;
i
<
lod_level
;
++
i
)
{
for
(
uint64_t
i
=
0
;
i
<
lod_level
;
++
i
)
{
uint64_t
size
=
*
(
reinterpret_cast
<
const
uint64_t
*>
(
data_buf
));
uint64_t
size
=
*
(
reinterpret_cast
<
uint64_t
*>
(
*
data_buf
));
data_buf
+=
sizeof
(
uint64_t
);
*
data_buf
+=
sizeof
(
uint64_t
);
std
::
vector
<
size_t
>
tmp_dim
(
size
/
sizeof
(
size_t
));
std
::
vector
<
size_t
>
tmp_dim
(
size
/
sizeof
(
size_t
));
memcpy
(
tmp_dim
.
data
(),
data_buf
,
size
);
memcpy
(
tmp_dim
.
data
(),
*
data_buf
,
size
);
(
*
lod
)[
i
]
=
std
::
move
(
tmp_dim
);
(
*
lod
)[
i
]
=
std
::
move
(
tmp_dim
);
data_buf
+=
size
;
*
data_buf
+=
size
;
}
}
// tensor version
// tensor version
uint32_t
tensor_version
=
*
(
reinterpret_cast
<
const
uint32_t
*>
(
data_buf
));
uint32_t
tensor_version
=
*
(
reinterpret_cast
<
uint32_t
*>
(
*
data_buf
));
data_buf
+=
sizeof
(
uint32_t
);
*
data_buf
+=
sizeof
(
uint32_t
);
// tensor desc size
// tensor desc size
int32_t
tensor_desc_size
=
*
(
reinterpret_cast
<
const
int32_t
*>
(
data_buf
));
int32_t
tensor_desc_size
=
*
(
reinterpret_cast
<
int32_t
*>
(
*
data_buf
));
data_buf
+=
sizeof
(
int32_t
);
*
data_buf
+=
sizeof
(
int32_t
);
// skip tensor desc
// skip tensor desc
data_buf
+=
tensor_desc_size
;
*
data_buf
+=
tensor_desc_size
;
const
framework
::
TensorDesc
&
tensor_desc
=
var_desc
.
Tensor_desc
();
const
framework
::
TensorDesc
&
tensor_desc
=
var_desc
->
Tensor_desc
();
tensor
->
Resize
(
framework
::
make_ddim
(
tensor_desc
.
Dims
()));
tensor
->
Resize
(
framework
::
make_ddim
(
tensor_desc
.
Dims
()));
// parse tensor from stream
// parse tensor from stream
switch
(
tensor_desc
.
DataType
())
{
switch
(
tensor_desc
.
DataType
())
{
case
framework
::
VARTYPE_TYPE_FP32
:
case
framework
::
VARTYPE_TYPE_FP32
:
LoadMemInternal
<
float
>
(
data_buf
,
tensor
);
LoadMemInternal
<
float
>
(
(
void
**
)
data_buf
,
tensor
);
break
;
break
;
case
framework
::
VARTYPE_TYPE_INT8
:
case
framework
::
VARTYPE_TYPE_INT8
:
LoadMemInternal
<
int8_t
>
(
data_buf
,
tensor
);
LoadMemInternal
<
int8_t
>
(
(
void
**
)
data_buf
,
tensor
);
break
;
break
;
case
framework
::
VARTYPE_TYPE_INT32
:
case
framework
::
VARTYPE_TYPE_INT32
:
LoadMemInternal
<
int
>
(
data_buf
,
tensor
);
LoadMemInternal
<
int
>
(
(
void
**
)
data_buf
,
tensor
);
break
;
break
;
default:
default:
LOG
(
kLOG_ERROR
)
<<
"data type is not supported"
;
LOG
(
kLOG_ERROR
)
<<
"data type is not supported"
;
...
@@ -181,11 +161,10 @@ void Executor<Dtype, P>::InitMemory() {
...
@@ -181,11 +161,10 @@ void Executor<Dtype, P>::InitMemory() {
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
continue
;
continue
;
}
}
char
*
origin_data
=
char
*
data
=
Get_binary_data
(
program_
.
model_path
+
"/"
+
var_desc
->
Name
());
ReadFileToBuff
(
program_
.
model_path
+
"/"
+
var_desc
->
Name
());
char
*
data
=
origin_data
;
LoadMemory
((
void
**
)
&
data
,
var_desc
,
tensor
);
LoadMemory
(
data
,
*
var_desc
,
tensor
);
delete
[]
data
;
delete
[]
origin_data
;
}
else
{
}
else
{
if
(
var_desc
->
Type
()
==
framework
::
VARTYPE_TYPE_LOD_TENSOR
)
{
if
(
var_desc
->
Type
()
==
framework
::
VARTYPE_TYPE_LOD_TENSOR
)
{
varInputMemory
(
var_desc
,
var
,
tensor
);
varInputMemory
(
var_desc
,
var
,
tensor
);
...
@@ -197,16 +176,15 @@ void Executor<Dtype, P>::InitMemory() {
...
@@ -197,16 +176,15 @@ void Executor<Dtype, P>::InitMemory() {
template
<
typename
Dtype
,
Precision
P
>
template
<
typename
Dtype
,
Precision
P
>
void
Executor
<
Dtype
,
P
>::
InitCombineMemory
()
{
void
Executor
<
Dtype
,
P
>::
InitCombineMemory
()
{
char
*
origin_data
;
char
*
data
=
nullptr
;
bool
self_alloc
=
false
;
if
(
program_
.
combined_params_buf
&&
program_
.
combined_params_len
)
{
if
(
program_
.
combined_params_buf
&&
program_
.
combined_params_len
)
{
LOG
(
kLOG_INFO
)
<<
"use outter memory"
;
data
=
(
char
*
)
program_
.
combined_params_buf
;
origin_data
=
(
char
*
)
program_
.
combined_params_buf
;
}
else
{
}
else
{
LOG
(
kLOG_INFO
)
<<
" begin init combine memory"
;
self_alloc
=
true
;
origin_data
=
Get_binary_data
(
program_
.
para_path
);
data
=
ReadFileToBuff
(
program_
.
para_path
);
}
}
PADDLE_MOBILE_ENFORCE
(
origin_data
!=
nullptr
,
"origin_data==nullptr!!!"
);
PADDLE_MOBILE_ENFORCE
(
data
!=
nullptr
,
"data == nullptr"
);
char
*
data
=
origin_data
;
for
(
const
auto
&
block
:
to_predict_program_
->
Blocks
())
{
for
(
const
auto
&
block
:
to_predict_program_
->
Blocks
())
{
for
(
const
auto
&
var_desc
:
block
->
Vars
())
{
for
(
const
auto
&
var_desc
:
block
->
Vars
())
{
auto
var
=
program_
.
scope
->
Var
(
var_desc
->
Name
());
auto
var
=
program_
.
scope
->
Var
(
var_desc
->
Name
());
...
@@ -215,7 +193,7 @@ void Executor<Dtype, P>::InitCombineMemory() {
...
@@ -215,7 +193,7 @@ void Executor<Dtype, P>::InitCombineMemory() {
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
continue
;
continue
;
}
}
LoadMemory
(
data
,
*
var_desc
,
tensor
);
LoadMemory
(
(
void
**
)
&
data
,
var_desc
,
tensor
);
}
else
{
}
else
{
if
(
var_desc
->
Type
()
==
framework
::
VARTYPE_TYPE_LOD_TENSOR
)
{
if
(
var_desc
->
Type
()
==
framework
::
VARTYPE_TYPE_LOD_TENSOR
)
{
varInputMemory
(
var_desc
,
var
,
tensor
);
varInputMemory
(
var_desc
,
var
,
tensor
);
...
@@ -223,9 +201,10 @@ void Executor<Dtype, P>::InitCombineMemory() {
...
@@ -223,9 +201,10 @@ void Executor<Dtype, P>::InitCombineMemory() {
}
}
}
}
}
}
if
(
self_alloc
)
{
delete
[]
origin_data
;
delete
[]
data
;
LOG
(
kLOG_INFO
)
<<
" end init combine memory "
;
}
LOG
(
kLOG_INFO
)
<<
"init combine memory finish"
;
}
}
template
<
typename
Dtype
,
Precision
P
>
template
<
typename
Dtype
,
Precision
P
>
...
@@ -233,33 +212,27 @@ bool Executor<Dtype, P>::varInputMemory(
...
@@ -233,33 +212,27 @@ bool Executor<Dtype, P>::varInputMemory(
const
std
::
shared_ptr
<
framework
::
VarDesc
>
&
var_desc
,
Variable
*
var
,
const
std
::
shared_ptr
<
framework
::
VarDesc
>
&
var_desc
,
Variable
*
var
,
framework
::
LoDTensor
*
tensor
)
const
{
framework
::
LoDTensor
*
tensor
)
const
{
auto
type
=
var_desc
->
Tensor_desc
().
DataType
();
auto
type
=
var_desc
->
Tensor_desc
().
DataType
();
bool
is_mute_match
=
(
type
==
framework
::
VARTYPE_TYPE_FP32
)
||
(
type
==
framework
::
VARTYPE_TYPE_INT8
)
||
(
type
==
framework
::
VARTYPE_TYPE_INT32
)
||
(
type
==
framework
::
VARTYPE_TYPE_INT64
);
PADDLE_MOBILE_ENFORCE
(
is_mute_match
,
"got unhandled data type : %d"
,
type
);
switch
(
type
)
{
switch
(
type
)
{
case
framework
::
VARTYPE_TYPE_FP32
:
{
case
framework
::
VARTYPE_TYPE_FP32
:
tensor
->
mutable_data
<
float
>
();
tensor
->
mutable_data
<
float
>
();
break
;
break
;
}
case
framework
::
VARTYPE_TYPE_INT8
:
case
framework
::
VARTYPE_TYPE_INT8
:
{
tensor
->
mutable_data
<
int8_t
>
();
tensor
->
mutable_data
<
int8_t
>
();
break
;
break
;
}
case
framework
::
VARTYPE_TYPE_INT32
:
case
framework
::
VARTYPE_TYPE_INT32
:
{
tensor
->
mutable_data
<
int32_t
>
();
tensor
->
mutable_data
<
int32_t
>
();
break
;
break
;
}
case
framework
::
VARTYPE_TYPE_INT64
:
case
framework
::
VARTYPE_TYPE_INT64
:
{
tensor
->
mutable_data
<
int64_t
>
();
tensor
->
mutable_data
<
int64_t
>
();
break
;
break
;
}
default:
default:
{
break
;
break
;
}
}
}
bool
is_mute_match
=
(
type
==
framework
::
VARTYPE_TYPE_FP32
)
||
(
type
==
framework
::
VARTYPE_TYPE_INT8
)
||
(
type
==
framework
::
VARTYPE_TYPE_INT32
)
||
(
type
==
framework
::
VARTYPE_TYPE_INT64
);
PADDLE_MOBILE_ENFORCE
(
is_mute_match
,
"got unhandled data type : %d"
,
type
);
return
is_mute_match
;
return
is_mute_match
;
}
}
...
@@ -299,17 +272,12 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
...
@@ -299,17 +272,12 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
framework
::
GetVarValue
<
framework
::
LoDTensor
>
(
out_keys
[
0
],
output_map
,
framework
::
GetVarValue
<
framework
::
LoDTensor
>
(
out_keys
[
0
],
output_map
,
*
(
program_
.
scope
));
*
(
program_
.
scope
));
#ifdef PADDLE_MOBILE_PROFILE
#ifdef PADDLE_MOBILE_PROFILE
// FILE *pf = fopen("profile.out", "w");
std
::
unordered_map
<
std
::
string
,
uint64_t
>
_tp
;
std
::
unordered_map
<
std
::
string
,
uint64_t
>
_tp
;
for
(
int
i
=
0
;
i
<
profile
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
profile
.
size
();
i
++
)
{
const
auto
&
pInfo
=
profile
[
i
];
const
auto
&
pInfo
=
profile
[
i
];
uint64_t
timeCost
=
pInfo
.
runEnd
-
pInfo
.
runBegin
;
uint64_t
timeCost
=
pInfo
.
runEnd
-
pInfo
.
runBegin
;
_tp
[
ops
[
i
]
->
Type
()]
+=
timeCost
;
_tp
[
ops
[
i
]
->
Type
()]
+=
timeCost
;
// fprintf(pf, "%d\t%s\t%d\t%llu\t%llu\t%llu\n", i,
// ops[i]->Type().c_str(),
// pInfo.tid, pInfo.runBegin, pInfo.runEnd, timeCost);
}
}
// fclose(pf);
printf
(
"====================[ profile ]======================
\n
"
);
printf
(
"====================[ profile ]======================
\n
"
);
using
prof_t
=
std
::
pair
<
std
::
string
,
uint64_t
>
;
using
prof_t
=
std
::
pair
<
std
::
string
,
uint64_t
>
;
std
::
vector
<
prof_t
>
_tv
(
_tp
.
begin
(),
_tp
.
end
());
std
::
vector
<
prof_t
>
_tv
(
_tp
.
begin
(),
_tp
.
end
());
...
@@ -359,7 +327,6 @@ std::shared_ptr<framework::LoDTensor> Executor<Dtype, P>::PredictLod(
...
@@ -359,7 +327,6 @@ std::shared_ptr<framework::LoDTensor> Executor<Dtype, P>::PredictLod(
if
(
loddable_
)
{
if
(
loddable_
)
{
ops
[
i
]
->
InferShape
();
ops
[
i
]
->
InferShape
();
}
}
// to Run
ops
[
i
]
->
Run
();
ops
[
i
]
->
Run
();
#ifdef PADDLE_MOBILE_PROFILE
#ifdef PADDLE_MOBILE_PROFILE
clock_gettime
(
CLOCK_MONOTONIC
,
&
ts
);
clock_gettime
(
CLOCK_MONOTONIC
,
&
ts
);
...
@@ -375,17 +342,12 @@ std::shared_ptr<framework::LoDTensor> Executor<Dtype, P>::PredictLod(
...
@@ -375,17 +342,12 @@ std::shared_ptr<framework::LoDTensor> Executor<Dtype, P>::PredictLod(
framework
::
GetVarValue
<
framework
::
LoDTensor
>
(
out_keys
[
0
],
output_map
,
framework
::
GetVarValue
<
framework
::
LoDTensor
>
(
out_keys
[
0
],
output_map
,
*
(
program_
.
scope
));
*
(
program_
.
scope
));
#ifdef PADDLE_MOBILE_PROFILE
#ifdef PADDLE_MOBILE_PROFILE
// FILE *pf = fopen("profile.out", "w");
std
::
unordered_map
<
std
::
string
,
uint64_t
>
_tp
;
std
::
unordered_map
<
std
::
string
,
uint64_t
>
_tp
;
for
(
int
i
=
0
;
i
<
profile
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
profile
.
size
();
i
++
)
{
const
auto
&
pInfo
=
profile
[
i
];
const
auto
&
pInfo
=
profile
[
i
];
uint64_t
timeCost
=
pInfo
.
runEnd
-
pInfo
.
runBegin
;
uint64_t
timeCost
=
pInfo
.
runEnd
-
pInfo
.
runBegin
;
_tp
[
ops
[
i
]
->
Type
()]
+=
timeCost
;
_tp
[
ops
[
i
]
->
Type
()]
+=
timeCost
;
// fprintf(pf, "%d\t%s\t%d\t%llu\t%llu\t%llu\n", i,
// ops[i]->Type().c_str(),
// pInfo.tid, pInfo.runBegin, pInfo.runEnd, timeCost);
}
}
// fclose(pf);
printf
(
"====================[ profile ]======================
\n
"
);
printf
(
"====================[ profile ]======================
\n
"
);
using
prof_t
=
std
::
pair
<
std
::
string
,
uint64_t
>
;
using
prof_t
=
std
::
pair
<
std
::
string
,
uint64_t
>
;
std
::
vector
<
prof_t
>
_tv
(
_tp
.
begin
(),
_tp
.
end
());
std
::
vector
<
prof_t
>
_tv
(
_tp
.
begin
(),
_tp
.
end
());
...
...
src/io/executor.h
浏览文件 @
382e2ec9
...
@@ -14,15 +14,16 @@ limitations under the License. */
...
@@ -14,15 +14,16 @@ limitations under the License. */
#pragma once
#pragma once
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "common/types.h"
#include "common/types.h"
#include "common/util.h"
#include "framework/lod_tensor.h"
#include "framework/lod_tensor.h"
#include "framework/operator.h"
#include "framework/operator.h"
#include "framework/program/program.h"
#include "framework/program/program.h"
#include "framework/tensor.h"
#include "framework/tensor.h"
#include <memory>
#include <string>
#include <vector>
#include <map>
namespace
paddle_mobile
{
namespace
paddle_mobile
{
...
@@ -37,15 +38,18 @@ class Executor {
...
@@ -37,15 +38,18 @@ class Executor {
Executor
(
const
framework
::
Program
<
Dtype
>
program
,
Executor
(
const
framework
::
Program
<
Dtype
>
program
,
const
bool
use_optimize
=
true
,
const
bool
use_optimize
=
true
,
const
bool
loddable
=
false
);
const
bool
loddable
=
false
);
// predict with tensor
// @param input input tensor to do prediction
// predict with tensor input
// @param t input tensor to do prediction
// @return predicted tensor
// @return predicted tensor
std
::
shared_ptr
<
framework
::
Tensor
>
Predict
(
const
framework
::
Tensor
&
t
);
std
::
shared_ptr
<
framework
::
Tensor
>
Predict
(
const
framework
::
Tensor
&
t
);
// predict with lod tensor
// @param input input lod tensor to do prediction
// predict with lod tensor input
// @param t input lod tensor to do prediction
// @return predicted lod tensor
// @return predicted lod tensor
std
::
shared_ptr
<
framework
::
LoDTensor
>
PredictLod
(
std
::
shared_ptr
<
framework
::
LoDTensor
>
PredictLod
(
const
framework
::
LoDTensor
&
t
);
const
framework
::
LoDTensor
&
t
);
// predict with vector input and dims
// predict with vector input and dims
// @param input vector whose elements will be formed
// @param input vector whose elements will be formed
// @param input lod tensor to do prediction
// @param input lod tensor to do prediction
...
@@ -57,21 +61,22 @@ class Executor {
...
@@ -57,21 +61,22 @@ class Executor {
protected:
protected:
Executor
()
=
default
;
Executor
()
=
default
;
std
::
shared_ptr
<
framework
::
Tensor
>
Predict
(
const
framework
::
Tensor
&
t
,
int
block_id
);
bool
varInputMemory
(
const
std
::
shared_ptr
<
framework
::
VarDesc
>
&
var_desc
,
framework
::
Variable
*
var
,
framework
::
LoDTensor
*
tensor
)
const
;
void
InitMemory
();
void
InitMemory
();
void
LoadMemory
(
const
void
*
data
,
const
framework
::
VarDesc
var_desc
,
framework
::
LoDTensor
*
tensor
);
void
InitCombineMemory
();
void
InitCombineMemory
();
void
LoadMemory
(
void
**
data
,
const
std
::
shared_ptr
<
framework
::
VarDesc
>
var_desc
,
framework
::
LoDTensor
*
tensor
);
framework
::
Program
<
Dtype
>
program_
;
framework
::
Program
<
Dtype
>
program_
;
int
batch_size_
=
1
;
std
::
shared_ptr
<
framework
::
ProgramDesc
>
to_predict_program_
;
std
::
shared_ptr
<
framework
::
ProgramDesc
>
to_predict_program_
;
std
::
shared_ptr
<
framework
::
Tensor
>
Predict
(
const
framework
::
Tensor
&
t
,
int
block_id
);
std
::
map
<
framework
::
BlockDesc
,
std
::
map
<
framework
::
BlockDesc
,
std
::
vector
<
std
::
shared_ptr
<
framework
::
OperatorBase
<
Dtype
>>>>
std
::
vector
<
std
::
shared_ptr
<
framework
::
OperatorBase
<
Dtype
>>>>
ops_of_block_
;
ops_of_block_
;
bool
use_optimize_
=
false
;
bool
loddable_
=
false
;
#ifdef PADDLE_MOBILE_PROFILE
#ifdef PADDLE_MOBILE_PROFILE
struct
ProfInfo
{
struct
ProfInfo
{
int
tid
=
0
;
int
tid
=
0
;
...
@@ -79,10 +84,9 @@ class Executor {
...
@@ -79,10 +84,9 @@ class Executor {
uint64_t
runEnd
=
0UL
;
uint64_t
runEnd
=
0UL
;
};
};
#endif
#endif
int
batch_size_
=
1
;
bool
varInputMemory
(
const
std
::
shared_ptr
<
framework
::
VarDesc
>
&
var_desc
,
bool
use_optimize_
=
false
;
framework
::
Variable
*
var
,
bool
loddable_
=
false
;
framework
::
LoDTensor
*
tensor
)
const
;
};
};
}
// namespace paddle_mobile
}
// namespace paddle_mobile
src/io/loader.h
浏览文件 @
382e2ec9
...
@@ -24,19 +24,11 @@ namespace paddle_mobile {
...
@@ -24,19 +24,11 @@ namespace paddle_mobile {
template
<
typename
Dtype
=
CPU
,
Precision
P
=
Precision
::
FP32
>
template
<
typename
Dtype
=
CPU
,
Precision
P
=
Precision
::
FP32
>
class
Loader
{
class
Loader
{
public:
public:
/*
* @b load separate format fluid model
* @b 加载分开形式的 fluid 模型
* */
const
framework
::
Program
<
Dtype
,
P
>
Load
(
const
std
::
string
&
dirname
,
const
framework
::
Program
<
Dtype
,
P
>
Load
(
const
std
::
string
&
dirname
,
bool
optimize
=
false
,
bool
optimize
=
false
,
bool
quantification
=
false
,
bool
quantification
=
false
,
bool
can_add_split
=
false
);
bool
can_add_split
=
false
);
/*
* @b load combine format fluid mode
* @b 加载结合在一起格式的模型
* */
const
framework
::
Program
<
Dtype
,
P
>
Load
(
const
std
::
string
&
model_path
,
const
framework
::
Program
<
Dtype
,
P
>
Load
(
const
std
::
string
&
model_path
,
const
std
::
string
&
para_path
,
const
std
::
string
&
para_path
,
bool
optimize
=
false
,
bool
optimize
=
false
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录