Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
7915815b
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7915815b
编写于
7月 20, 2018
作者:
W
wangliu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add quantification tool to compress binary size
上级
8b833ee8
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
250 addition
and
17 deletion
+250
-17
CMakeLists.txt
CMakeLists.txt
+5
-0
src/framework/program/program.h
src/framework/program/program.h
+1
-0
src/io/executor.cpp
src/io/executor.cpp
+20
-5
src/io/loader.cpp
src/io/loader.cpp
+6
-4
src/io/loader.h
src/io/loader.h
+4
-1
src/io/paddle_mobile.cpp
src/io/paddle_mobile.cpp
+4
-4
src/io/paddle_mobile.h
src/io/paddle_mobile.h
+2
-2
test/net/test_googlenet.cpp
test/net/test_googlenet.cpp
+1
-1
tools/quantification/CMakeLists.txt
tools/quantification/CMakeLists.txt
+5
-0
tools/quantification/convert.cpp
tools/quantification/convert.cpp
+202
-0
未找到文件。
CMakeLists.txt
浏览文件 @
7915815b
...
...
@@ -9,6 +9,7 @@ option(LOG_PROFILE "log profile" ON)
option
(
CPU
"armv7 with neon"
ON
)
option
(
MALI_GPU
"mali gpu"
OFF
)
option
(
FPGA
"fpga"
OFF
)
option
(
QUANTI
"quantification"
OFF
)
file
(
GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm
)
file
(
GLOB_RECURSE PADDLE_MOBILE_H src/*.h
)
...
...
@@ -153,3 +154,7 @@ if(DEBUGING)
endif
()
endif
()
if
(
QUANTI
)
add_subdirectory
(
tools/quantification
)
endif
()
src/framework/program/program.h
浏览文件 @
7915815b
...
...
@@ -30,6 +30,7 @@ class Program {
std
::
string
model_path
;
std
::
string
para_path
;
bool
combined
=
false
;
bool
quantification
=
false
;
private:
};
...
...
src/io/executor.cpp
浏览文件 @
7915815b
...
...
@@ -154,7 +154,7 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
tensor
->
Resize
(
framework
::
make_ddim
(
desc
.
Dims
()));
void
*
memory
=
tenso
r
;
void
*
memory
=
nullpt
r
;
int
type_size
=
0
;
switch
(
desc
.
DataType
())
{
case
framework
::
VARTYPE_TYPE_FP16
:
...
...
@@ -179,11 +179,26 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
default:
break
;
}
for
(
int
n
=
0
;
n
<
memory_size
*
type_size
;
++
n
)
{
static_cast
<
char
*>
(
memory
)[
n
]
=
(
*
data
)[
n
];
if
(
program_
.
quantification
)
{
float
min_value
;
float
max_value
;
memcpy
(
&
min_value
,
*
data
,
sizeof
(
float
));
memcpy
(
&
max_value
,
*
data
+
sizeof
(
float
)
,
sizeof
(
float
));
*
data
+=
2
*
sizeof
(
float
);
const
float
factor
=
(
max_value
-
min_value
)
/
255.0
;
uint8_t
*
uint8_data
=
(
uint8_t
*
)
(
*
data
);
for
(
int
k
=
0
;
k
<
memory_size
;
++
k
)
{
static_cast
<
float
*>
(
memory
)[
k
]
=
uint8_data
[
k
]
*
factor
+
min_value
;
}
*
data
+=
(
memory_size
*
sizeof
(
uint8_t
));
}
else
{
for
(
int
n
=
0
;
n
<
memory_size
*
type_size
;
++
n
)
{
static_cast
<
char
*>
(
memory
)[
n
]
=
(
*
data
)[
n
];
}
(
*
data
)
+=
(
sizeof
(
char
)
*
memory_size
*
type_size
);
}
(
*
data
)
+=
(
sizeof
(
char
)
*
memory_size
*
type_size
);
}
template
<
typename
Dtype
,
Precision
P
>
...
...
src/io/loader.cpp
浏览文件 @
7915815b
...
...
@@ -44,9 +44,9 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) {
template
<
typename
Dtype
,
Precision
P
>
const
framework
::
Program
<
Dtype
,
P
>
Loader
<
Dtype
,
P
>::
Load
(
const
std
::
string
&
dirname
,
bool
optimize
,
bool
can_add_split
)
{
const
std
::
string
&
dirname
,
bool
optimize
,
bool
quantification
,
bool
can_add_split
)
{
auto
program
=
this
->
LoadProgram
(
dirname
+
"/__model__"
,
optimize
,
can_add_split
);
this
->
LoadProgram
(
dirname
+
"/__model__"
,
optimize
,
quantification
,
can_add_split
);
program
.
model_path
=
dirname
;
return
program
;
}
...
...
@@ -54,16 +54,17 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
template
<
typename
Dtype
,
Precision
P
>
const
framework
::
Program
<
Dtype
,
P
>
Loader
<
Dtype
,
P
>::
Load
(
const
std
::
string
&
model_path
,
const
std
::
string
&
para_path
,
bool
optimize
)
{
bool
optimize
,
bool
quantification
)
{
auto
program
=
this
->
LoadProgram
(
model_path
,
optimize
);
program
.
para_path
=
para_path
;
program
.
combined
=
true
;
program
.
quantification
=
quantification
;
return
program
;
}
template
<
typename
Dtype
,
Precision
P
>
const
framework
::
Program
<
Dtype
,
P
>
Loader
<
Dtype
,
P
>::
LoadProgram
(
const
std
::
string
&
model_path
,
bool
optimize
,
bool
can_add_split
)
{
const
std
::
string
&
model_path
,
bool
optimize
,
bool
quantification
,
bool
can_add_split
)
{
std
::
string
model_filename
=
model_path
;
PaddleMobile__Framework__Proto__ProgramDesc
*
c_program
;
uint8_t
*
buf
=
NULL
;
...
...
@@ -82,6 +83,7 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
framework
::
Program
<
Dtype
,
P
>
program
;
program
.
originProgram
=
originProgramDesc
;
program
.
quantification
=
quantification
;
auto
scope
=
std
::
make_shared
<
framework
::
Scope
>
();
program
.
scope
=
scope
;
...
...
src/io/loader.h
浏览文件 @
7915815b
...
...
@@ -30,6 +30,7 @@ class Loader {
* */
const
framework
::
Program
<
Dtype
,
P
>
Load
(
const
std
::
string
&
dirname
,
bool
optimize
=
false
,
bool
quantification
=
false
,
bool
can_add_split
=
false
);
/*
...
...
@@ -38,11 +39,13 @@ class Loader {
* */
const
framework
::
Program
<
Dtype
,
P
>
Load
(
const
std
::
string
&
model_path
,
const
std
::
string
&
para_path
,
bool
optimize
=
false
);
bool
optimize
=
false
,
bool
quantification
=
false
);
private:
const
framework
::
Program
<
Dtype
,
P
>
LoadProgram
(
const
std
::
string
&
model_path
,
bool
optimize
=
false
,
bool
quantification
=
false
,
bool
can_add_split
=
false
);
};
...
...
src/io/paddle_mobile.cpp
浏览文件 @
7915815b
...
...
@@ -25,7 +25,7 @@ void PaddleMobile<Dtype, P>::SetThreadNum(int num) {
};
template
<
typename
Dtype
,
Precision
P
>
bool
PaddleMobile
<
Dtype
,
P
>::
Load
(
const
std
::
string
&
dirname
,
bool
optimize
,
bool
PaddleMobile
<
Dtype
,
P
>::
Load
(
const
std
::
string
&
dirname
,
bool
optimize
,
bool
quantification
,
int
batch_size
)
{
if
(
loader_
.
get
()
==
nullptr
)
{
loader_
=
std
::
make_shared
<
Loader
<
Dtype
,
P
>>
();
...
...
@@ -35,7 +35,7 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize,
if
(
executor_
.
get
()
==
nullptr
)
{
executor_
=
std
::
make_shared
<
Executor
<
Dtype
,
P
>>
(
loader_
->
Load
(
dirname
,
optimize
),
batch_size
,
optimize
);
loader_
->
Load
(
dirname
,
optimize
,
quantification
),
batch_size
,
optimize
);
}
else
{
LOG
(
kLOG_INFO
)
<<
"executor inited"
;
}
...
...
@@ -45,7 +45,7 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize,
template
<
typename
Dtype
,
Precision
P
>
bool
PaddleMobile
<
Dtype
,
P
>::
Load
(
const
std
::
string
&
model_path
,
const
std
::
string
&
para_path
,
bool
optimize
,
const
std
::
string
&
para_path
,
bool
optimize
,
bool
quantification
,
int
batch_size
)
{
if
(
loader_
.
get
()
==
nullptr
)
{
loader_
=
std
::
make_shared
<
Loader
<
Dtype
,
P
>>
();
...
...
@@ -55,7 +55,7 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &model_path,
if
(
executor_
.
get
()
==
nullptr
)
{
executor_
=
std
::
make_shared
<
Executor
<
Dtype
,
P
>>
(
loader_
->
Load
(
model_path
,
para_path
,
optimize
),
batch_size
,
optimize
);
loader_
->
Load
(
model_path
,
para_path
,
optimize
,
quantification
),
batch_size
,
optimize
);
}
else
{
LOG
(
kLOG_INFO
)
<<
"executor inited"
;
}
...
...
src/io/paddle_mobile.h
浏览文件 @
7915815b
...
...
@@ -38,7 +38,7 @@ class PaddleMobile {
* @b load separate format fluid model
* @b 加载分开形式的 fluid 模型
* */
bool
Load
(
const
std
::
string
&
dirname
,
bool
optimize
=
false
,
bool
Load
(
const
std
::
string
&
dirname
,
bool
optimize
=
false
,
bool
quantification
=
false
,
int
batch_size
=
1
);
/*
...
...
@@ -46,7 +46,7 @@ class PaddleMobile {
* @b 加载结合在一起格式的模型
* */
bool
Load
(
const
std
::
string
&
model_path
,
const
std
::
string
&
para_path
,
bool
optimize
=
false
,
int
batch_size
=
1
);
bool
optimize
=
false
,
bool
quantification
=
false
,
int
batch_size
=
1
);
void
SetThreadNum
(
int
num
);
/*
...
...
test/net/test_googlenet.cpp
浏览文件 @
7915815b
...
...
@@ -21,7 +21,7 @@ int main() {
paddle_mobile
.
SetThreadNum
(
4
);
bool
optimize
=
true
;
auto
time1
=
time
();
if
(
paddle_mobile
.
Load
(
g_googlenet
,
optimize
))
{
if
(
paddle_mobile
.
Load
(
g_googlenet
,
optimize
,
true
))
{
auto
time2
=
time
();
DLOG
<<
"load cost: "
<<
time_diff
(
time1
,
time1
)
<<
"ms"
;
std
::
vector
<
float
>
input
;
...
...
tools/quantification/CMakeLists.txt
0 → 100644
浏览文件 @
7915815b
set
(
dir
${
CMAKE_CURRENT_SOURCE_DIR
}
)
set
(
CMAKE_RUNTIME_OUTPUT_DIRECTORY
"
${
dir
}
/build"
)
ADD_EXECUTABLE
(
convert convert.cpp
)
target_link_libraries
(
convert paddle-mobile
)
\ No newline at end of file
tools/quantification/convert.cpp
0 → 100644
浏览文件 @
7915815b
#include "io/paddle_mobile.h"
#include <cstdlib>
using
std
::
string
;
static
const
std
::
string
g_googlenet_combine
=
"../models/googlenet_combine"
;
static
const
std
::
string
g_googlenet
=
"../models/googlenet"
;
using
paddle_mobile
::
Executor
;
using
paddle_mobile
::
framework
::
Program
;
char
*
Get_binary_data
(
std
::
string
filename
)
{
FILE
*
file
=
fopen
(
filename
.
c_str
(),
"rb"
);
PADDLE_MOBILE_ENFORCE
(
file
!=
nullptr
,
"can't open file: %s "
,
filename
.
c_str
());
fseek
(
file
,
0
,
SEEK_END
);
int64_t
size
=
ftell
(
file
);
PADDLE_MOBILE_ENFORCE
(
size
>
0
,
"size is too small"
);
rewind
(
file
);
char
*
data
=
new
char
[
size
];
size_t
bytes_read
=
fread
(
data
,
1
,
size
,
file
);
PADDLE_MOBILE_ENFORCE
(
bytes_read
==
size
,
"read binary file bytes do not match with fseek"
);
DLOG
<<
"Get_binary_data end"
;
fclose
(
file
);
return
data
;
}
void
LoadWithDump
(
const
paddle_mobile
::
framework
::
VarDesc
var_desc
,
paddle_mobile
::
framework
::
LoDTensor
*
tensor
,
char
**
data
,
FILE
*
out_file
)
{
// 1. version
uint32_t
version
=
*
reinterpret_cast
<
uint32_t
*>
(
*
data
);
// write version
fwrite
(
&
version
,
sizeof
(
uint32_t
),
1
,
out_file
);
(
*
data
)
+=
sizeof
(
uint32_t
);
// 2 Lod information
uint64_t
*
lod_level_ptr
=
new
uint64_t
();
memcpy
(
lod_level_ptr
,
(
*
data
),
sizeof
(
uint64_t
));
uint64_t
lod_level
=
0
;
// write lod Information
fwrite
(
&
lod_level
,
sizeof
(
uint64_t
),
1
,
out_file
);
delete
lod_level_ptr
;
(
*
data
)
+=
sizeof
(
uint64_t
);
auto
&
lod
=
*
tensor
->
mutable_lod
();
lod
.
resize
(
lod_level
);
for
(
uint64_t
i
=
0
;
i
<
lod_level
;
++
i
)
{
uint64_t
size
=
*
reinterpret_cast
<
uint64_t
*>
(
*
data
);
// write lod size
fwrite
(
&
size
,
sizeof
(
uint64_t
),
1
,
out_file
);
(
*
data
)
+=
sizeof
(
uint64_t
);
std
::
vector
<
size_t
>
tmp
(
size
/
sizeof
(
size_t
));
for
(
int
k
=
0
;
k
<
tmp
.
size
();
++
k
)
{
tmp
[
k
]
=
*
reinterpret_cast
<
size_t
*>
(
*
data
);
(
*
data
)
+=
sizeof
(
size_t
);
}
// write lod size vector
fwrite
(
&
tmp
,
sizeof
(
size_t
),
tmp
.
size
(),
out_file
);
lod
[
i
]
=
tmp
;
}
// 3. tensor version
uint32_t
tensor_version
=
*
reinterpret_cast
<
uint32_t
*>
(
*
data
);
// write tensor version
fwrite
(
&
tensor_version
,
sizeof
(
uint32_t
),
1
,
out_file
);
(
*
data
)
+=
sizeof
(
uint32_t
);
// 4. tensor desc
int32_t
size
=
*
reinterpret_cast
<
int32_t
*>
(
*
data
);
// write tensor desc
fwrite
(
&
size
,
sizeof
(
int32_t
),
1
,
out_file
);
(
*
data
)
+=
sizeof
(
int32_t
);
std
::
unique_ptr
<
char
[]
>
buf
(
new
char
[
size
]);
for
(
int
m
=
0
;
m
<
size
;
++
m
)
{
buf
.
get
()[
m
]
=
(
*
data
)[
m
];
}
fwrite
(
buf
.
get
(),
sizeof
(
char
),
size
,
out_file
);
(
*
data
)
+=
(
sizeof
(
char
)
*
size
);
const
paddle_mobile
::
framework
::
TensorDesc
&
desc
=
var_desc
.
Tensor_desc
();
int
memory_size
=
1
;
for
(
auto
l
:
desc
.
Dims
())
{
memory_size
*=
l
;
}
tensor
->
Resize
(
paddle_mobile
::
framework
::
make_ddim
(
desc
.
Dims
()));
void
*
memory
=
tensor
;
int
type_size
=
0
;
switch
(
desc
.
DataType
())
{
case
paddle_mobile
::
framework
::
VARTYPE_TYPE_FP16
:
type_size
=
2
;
break
;
case
paddle_mobile
::
framework
::
VARTYPE_TYPE_FP32
:
type_size
=
4
;
memory
=
tensor
->
mutable_data
<
float
>
();
break
;
case
paddle_mobile
::
framework
::
VARTYPE_TYPE_FP64
:
type_size
=
8
;
break
;
case
paddle_mobile
::
framework
::
VARTYPE_TYPE_INT32
:
type_size
=
4
;
break
;
case
paddle_mobile
::
framework
::
VARTYPE_TYPE_INT64
:
type_size
=
8
;
break
;
case
paddle_mobile
::
framework
::
VARTYPE_TYPE_BOOL
:
type_size
=
1
;
break
;
default:
break
;
}
for
(
int
n
=
0
;
n
<
memory_size
*
type_size
;
++
n
)
{
static_cast
<
char
*>
(
memory
)[
n
]
=
(
*
data
)[
n
];
}
(
*
data
)
+=
(
sizeof
(
char
)
*
memory_size
*
type_size
);
// for float 32
float
min_value
=
std
::
numeric_limits
<
float
>::
max
();
float
max_value
=
std
::
numeric_limits
<
float
>::
min
();
for
(
int
k
=
0
;
k
<
memory_size
;
++
k
)
{
min_value
=
std
::
min
(
min_value
,
static_cast
<
float
*>
(
memory
)[
k
]);
max_value
=
std
::
max
(
max_value
,
static_cast
<
float
*>
(
memory
)[
k
]);
}
fwrite
(
&
min_value
,
sizeof
(
float
),
1
,
out_file
);
fwrite
(
&
max_value
,
sizeof
(
float
),
1
,
out_file
);
for
(
int
g
=
0
;
g
<
memory_size
;
++
g
)
{
float
value
=
static_cast
<
float
*>
(
memory
)[
g
];
uint8_t
factor
=
(
uint8_t
)
round
((
value
-
min_value
)
/
(
max_value
-
min_value
)
*
255
);
fwrite
(
&
factor
,
sizeof
(
uint8_t
),
1
,
out_file
);
}
}
void
quantificate_combined
(
std
::
string
model_path
,
std
::
string
param_path
,
std
::
string
param_min_path
){
paddle_mobile
::
Loader
<
paddle_mobile
::
CPU
,
paddle_mobile
::
Precision
::
FP32
>
loader
;
bool
optimize
=
true
;
auto
program
=
loader
.
Load
(
model_path
,
param_path
,
optimize
);
char
*
origin_data
=
Get_binary_data
(
program
.
para_path
);
char
*
data
=
origin_data
;
FILE
*
out_file
=
fopen
(
param_min_path
.
c_str
(),
"wb"
);
for
(
const
auto
&
block
:
program
.
originProgram
->
Blocks
())
{
for
(
const
auto
&
var_desc
:
block
->
Vars
())
{
auto
var
=
program
.
scope
->
Var
(
var_desc
->
Name
());
if
(
var_desc
->
Persistable
())
{
auto
tensor
=
var
->
template
GetMutable
<
paddle_mobile
::
framework
::
LoDTensor
>();
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
continue
;
}
LoadWithDump
(
*
var_desc
,
tensor
,
&
data
,
out_file
);
}
}
}
fclose
(
out_file
);
delete
origin_data
;
}
void
quantificate_seperated
(
std
::
string
model_dir
,
std
::
string
param_min_path
)
{
paddle_mobile
::
Loader
<
paddle_mobile
::
CPU
,
paddle_mobile
::
Precision
::
FP32
>
loader
;
bool
optimize
=
true
;
auto
program
=
loader
.
Load
(
model_dir
,
optimize
);
std
::
string
shell_command
=
"mkdir "
+
param_min_path
;
system
(
shell_command
.
c_str
());
for
(
const
auto
&
block
:
program
.
originProgram
->
Blocks
())
{
for
(
const
auto
&
var_desc
:
block
->
Vars
())
{
auto
var
=
program
.
scope
->
Var
(
var_desc
->
Name
());
if
(
var_desc
->
Persistable
())
{
auto
tensor
=
var
->
template
GetMutable
<
paddle_mobile
::
framework
::
LoDTensor
>();
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
continue
;
}
std
::
string
file_name
=
param_min_path
+
"/"
+
var_desc
->
Name
();
FILE
*
out_file
=
fopen
(
file_name
.
c_str
(),
"wb"
);
char
*
origin_data
=
Get_binary_data
(
program
.
model_path
+
"/"
+
var_desc
->
Name
());
char
*
data
=
origin_data
;
LoadWithDump
(
*
var_desc
,
tensor
,
&
data
,
out_file
);
delete
origin_data
;
fclose
(
out_file
);
}
}
}
}
int
main
()
{
std
::
string
filename
=
"params_min"
;
std
::
string
model_path
=
g_googlenet_combine
+
"/model"
;
std
::
string
param_path
=
g_googlenet_combine
+
"/params"
;
std
::
string
dirname
=
"param_min_dir"
;
std
::
string
model_dir
=
g_googlenet
;
// quantificate_combined(model_path, param_path,filename);
quantificate_seperated
(
model_dir
,
dirname
);
return
0
;
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录