Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Serving
提交
24c78e3b
S
Serving
项目概览
PaddlePaddle
/
Serving
大约 1 年 前同步成功
通知
186
Star
833
Fork
253
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
105
列表
看板
标记
里程碑
合并请求
10
Wiki
2
Wiki
分析
仓库
DevOps
项目成员
Pages
S
Serving
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
105
Issue
105
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
2
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
24c78e3b
编写于
3月 24, 2020
作者:
W
wangjiawei04
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add cube with quant infer
上级
898e062d
变更
16
隐藏空白更改
内联
并排
Showing
16 changed file
with
615 addition
and
49 deletion
+615
-49
core/configure/proto/server_configure.proto
core/configure/proto/server_configure.proto
+1
-0
core/general-server/op/CMakeLists.txt
core/general-server/op/CMakeLists.txt
+1
-1
core/general-server/op/general_dist_kv_quant_infer_op.cpp
core/general-server/op/general_dist_kv_quant_infer_op.cpp
+190
-0
core/general-server/op/general_dist_kv_quant_infer_op.h
core/general-server/op/general_dist_kv_quant_infer_op.h
+46
-0
core/predictor/framework/resource.cpp
core/predictor/framework/resource.cpp
+13
-32
core/predictor/framework/resource.h
core/predictor/framework/resource.h
+2
-1
core/predictor/src/pdserving.cpp
core/predictor/src/pdserving.cpp
+0
-8
core/predictor/tools/CMakeLists.txt
core/predictor/tools/CMakeLists.txt
+1
-1
core/predictor/tools/quant.cpp
core/predictor/tools/quant.cpp
+117
-0
core/predictor/tools/quant.h
core/predictor/tools/quant.h
+45
-0
core/predictor/tools/seq_generator.cpp
core/predictor/tools/seq_generator.cpp
+121
-3
python/examples/criteo_ctr_with_cube/clean.sh
python/examples/criteo_ctr_with_cube/clean.sh
+0
-1
python/examples/criteo_ctr_with_cube/cube_prepare.sh
python/examples/criteo_ctr_with_cube/cube_prepare.sh
+16
-1
python/examples/criteo_ctr_with_cube/cube_quant_prepare.sh
python/examples/criteo_ctr_with_cube/cube_quant_prepare.sh
+22
-0
python/examples/criteo_ctr_with_cube/test_server_quant.py
python/examples/criteo_ctr_with_cube/test_server_quant.py
+37
-0
python/paddle_serving_server/__init__.py
python/paddle_serving_server/__init__.py
+3
-1
未找到文件。
core/configure/proto/server_configure.proto
浏览文件 @
24c78e3b
...
@@ -56,6 +56,7 @@ message ResourceConf {
...
@@ -56,6 +56,7 @@ message ResourceConf {
optional
string
general_model_file
=
4
;
optional
string
general_model_file
=
4
;
optional
string
cube_config_path
=
5
;
optional
string
cube_config_path
=
5
;
optional
string
cube_config_file
=
6
;
optional
string
cube_config_file
=
6
;
optional
int32
cube_quant_bits
=
7
;
// set 0 if no quant.
};
};
// DAG node depency info
// DAG node depency info
...
...
core/general-server/op/CMakeLists.txt
浏览文件 @
24c78e3b
FILE
(
GLOB op_srcs
${
CMAKE_CURRENT_LIST_DIR
}
/*.cpp
)
FILE
(
GLOB op_srcs
${
CMAKE_CURRENT_LIST_DIR
}
/*.cpp
${
CMAKE_CURRENT_LIST_DIR
}
/../../predictor/tools/quant.cpp
)
LIST
(
APPEND serving_srcs
${
op_srcs
}
)
LIST
(
APPEND serving_srcs
${
op_srcs
}
)
core/general-server/op/general_dist_kv_quant_infer_op.cpp
0 → 100755
浏览文件 @
24c78e3b
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "core/general-server/op/general_dist_kv_quant_infer_op.h"
#include <algorithm>
#include <iostream>
#include <memory>
#include <sstream>
#include <unordered_map>
#include <utility>
#include "core/cube/cube-api/include/cube_api.h"
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/predictor/framework/resource.h"
#include "core/util/include/timer.h"
#include "core/predictor/tools/quant.h"
namespace
baidu
{
namespace
paddle_serving
{
namespace
serving
{
using
baidu
::
paddle_serving
::
Timer
;
using
baidu
::
paddle_serving
::
predictor
::
MempoolWrapper
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
Tensor
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
Response
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
Request
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
FetchInst
;
using
baidu
::
paddle_serving
::
predictor
::
InferManager
;
using
baidu
::
paddle_serving
::
predictor
::
PaddleGeneralModelConfig
;
int
GeneralDistKVQuantInferOp
::
inference
()
{
VLOG
(
2
)
<<
"Going to run inference"
;
const
GeneralBlob
*
input_blob
=
get_depend_argument
<
GeneralBlob
>
(
pre_name
());
VLOG
(
2
)
<<
"Get precedent op name: "
<<
pre_name
();
GeneralBlob
*
output_blob
=
mutable_data
<
GeneralBlob
>
();
if
(
!
input_blob
)
{
LOG
(
ERROR
)
<<
"Failed mutable depended argument, op:"
<<
pre_name
();
return
-
1
;
}
const
TensorVector
*
in
=
&
input_blob
->
tensor_vector
;
TensorVector
*
out
=
&
output_blob
->
tensor_vector
;
int
batch_size
=
input_blob
->
GetBatchSize
();
VLOG
(
2
)
<<
"input batch size: "
<<
batch_size
;
std
::
vector
<
uint64_t
>
keys
;
std
::
vector
<
rec
::
mcube
::
CubeValue
>
values
;
int
sparse_count
=
0
;
int
dense_count
=
0
;
std
::
vector
<
std
::
pair
<
int64_t
*
,
size_t
>>
dataptr_size_pairs
;
size_t
key_len
=
0
;
for
(
size_t
i
=
0
;
i
<
in
->
size
();
++
i
)
{
if
(
in
->
at
(
i
).
dtype
!=
paddle
::
PaddleDType
::
INT64
)
{
++
dense_count
;
continue
;
}
++
sparse_count
;
size_t
elem_num
=
1
;
for
(
size_t
s
=
0
;
s
<
in
->
at
(
i
).
shape
.
size
();
++
s
)
{
elem_num
*=
in
->
at
(
i
).
shape
[
s
];
}
key_len
+=
elem_num
;
int64_t
*
data_ptr
=
static_cast
<
int64_t
*>
(
in
->
at
(
i
).
data
.
data
());
dataptr_size_pairs
.
push_back
(
std
::
make_pair
(
data_ptr
,
elem_num
));
}
keys
.
resize
(
key_len
);
int
key_idx
=
0
;
for
(
size_t
i
=
0
;
i
<
dataptr_size_pairs
.
size
();
++
i
)
{
std
::
copy
(
dataptr_size_pairs
[
i
].
first
,
dataptr_size_pairs
[
i
].
first
+
dataptr_size_pairs
[
i
].
second
,
keys
.
begin
()
+
key_idx
);
key_idx
+=
dataptr_size_pairs
[
i
].
second
;
}
rec
::
mcube
::
CubeAPI
*
cube
=
rec
::
mcube
::
CubeAPI
::
instance
();
std
::
vector
<
std
::
string
>
table_names
=
cube
->
get_table_names
();
if
(
table_names
.
size
()
==
0
)
{
LOG
(
ERROR
)
<<
"cube init error or cube config not given."
;
return
-
1
;
}
int
ret
=
cube
->
seek
(
table_names
[
0
],
keys
,
&
values
);
if
(
values
.
size
()
!=
keys
.
size
()
||
values
[
0
].
buff
.
size
()
==
0
)
{
LOG
(
ERROR
)
<<
"cube value return null"
;
}
TensorVector
sparse_out
;
sparse_out
.
resize
(
sparse_count
);
TensorVector
dense_out
;
dense_out
.
resize
(
dense_count
);
int
cube_val_idx
=
0
;
int
sparse_idx
=
0
;
int
dense_idx
=
0
;
std
::
unordered_map
<
int
,
int
>
in_out_map
;
baidu
::
paddle_serving
::
predictor
::
Resource
&
resource
=
baidu
::
paddle_serving
::
predictor
::
Resource
::
instance
();
std
::
shared_ptr
<
PaddleGeneralModelConfig
>
model_config
=
resource
.
get_general_model_config
();
int
cube_quant_bits
=
resource
.
get_cube_quant_bits
();
size_t
EMBEDDING_SIZE
=
0
;
if
(
cube_quant_bits
==
0
)
{
EMBEDDING_SIZE
=
values
[
0
].
buff
.
size
()
/
sizeof
(
float
);
}
else
{
EMBEDDING_SIZE
=
values
[
0
].
buff
.
size
()
-
2
*
sizeof
(
float
);
}
for
(
size_t
i
=
0
;
i
<
in
->
size
();
++
i
)
{
if
(
in
->
at
(
i
).
dtype
!=
paddle
::
PaddleDType
::
INT64
)
{
dense_out
[
dense_idx
]
=
in
->
at
(
i
);
++
dense_idx
;
continue
;
}
sparse_out
[
sparse_idx
].
lod
.
resize
(
in
->
at
(
i
).
lod
.
size
());
for
(
size_t
x
=
0
;
x
<
sparse_out
[
sparse_idx
].
lod
.
size
();
++
x
)
{
sparse_out
[
sparse_idx
].
lod
[
x
].
resize
(
in
->
at
(
i
).
lod
[
x
].
size
());
std
::
copy
(
in
->
at
(
i
).
lod
[
x
].
begin
(),
in
->
at
(
i
).
lod
[
x
].
end
(),
sparse_out
[
sparse_idx
].
lod
[
x
].
begin
());
}
sparse_out
[
sparse_idx
].
dtype
=
paddle
::
PaddleDType
::
FLOAT32
;
sparse_out
[
sparse_idx
].
shape
.
push_back
(
sparse_out
[
sparse_idx
].
lod
[
0
].
back
());
sparse_out
[
sparse_idx
].
shape
.
push_back
(
EMBEDDING_SIZE
);
sparse_out
[
sparse_idx
].
name
=
model_config
->
_feed_name
[
i
];
sparse_out
[
sparse_idx
].
data
.
Resize
(
sparse_out
[
sparse_idx
].
lod
[
0
].
back
()
*
EMBEDDING_SIZE
*
sizeof
(
float
));
// END HERE
float
*
dst_ptr
=
static_cast
<
float
*>
(
sparse_out
[
sparse_idx
].
data
.
data
());
for
(
int
x
=
0
;
x
<
sparse_out
[
sparse_idx
].
lod
[
0
].
back
();
++
x
)
{
float
*
data_ptr
=
dst_ptr
+
x
*
EMBEDDING_SIZE
;
if
(
cube_quant_bits
==
0
)
{
memcpy
(
data_ptr
,
values
[
cube_val_idx
].
buff
.
data
(),
values
[
cube_val_idx
].
buff
.
size
());
}
else
{
// min (float), max (float), num, num, num... (Byte)
size_t
num_of_float
=
values
[
cube_val_idx
].
buff
.
size
()
-
2
*
sizeof
(
float
);
float
*
float_ptr
=
new
float
[
num_of_float
];
char
*
src_ptr
=
new
char
[
values
[
cube_val_idx
].
buff
.
size
()];
memcpy
(
src_ptr
,
values
[
cube_val_idx
].
buff
.
data
(),
values
[
cube_val_idx
].
buff
.
size
());
float
*
minmax
=
reinterpret_cast
<
float
*>
(
src_ptr
);
dequant
(
src_ptr
+
2
*
sizeof
(
float
),
float_ptr
,
minmax
[
0
],
minmax
[
1
],
num_of_float
,
cube_quant_bits
);
memcpy
(
data_ptr
,
float_ptr
,
sizeof
(
float
)
*
num_of_float
);
delete
float_ptr
;
delete
src_ptr
;
}
cube_val_idx
++
;
}
++
sparse_idx
;
}
TensorVector
infer_in
;
infer_in
.
insert
(
infer_in
.
end
(),
dense_out
.
begin
(),
dense_out
.
end
());
infer_in
.
insert
(
infer_in
.
end
(),
sparse_out
.
begin
(),
sparse_out
.
end
());
output_blob
->
SetBatchSize
(
batch_size
);
VLOG
(
2
)
<<
"infer batch size: "
<<
batch_size
;
Timer
timeline
;
int64_t
start
=
timeline
.
TimeStampUS
();
timeline
.
Start
();
if
(
InferManager
::
instance
().
infer
(
GENERAL_MODEL_NAME
,
&
infer_in
,
out
,
batch_size
))
{
LOG
(
ERROR
)
<<
"Failed do infer in fluid model: "
<<
GENERAL_MODEL_NAME
;
return
-
1
;
}
int64_t
end
=
timeline
.
TimeStampUS
();
CopyBlobInfo
(
input_blob
,
output_blob
);
AddBlobInfo
(
output_blob
,
start
);
AddBlobInfo
(
output_blob
,
end
);
return
0
;
}
DEFINE_OP
(
GeneralDistKVQuantInferOp
);
}
// namespace serving
}
// namespace paddle_serving
}
// namespace baidu
core/general-server/op/general_dist_kv_quant_infer_op.h
0 → 100644
浏览文件 @
24c78e3b
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#ifdef BCLOUD
#ifdef WITH_GPU
#include "paddle/paddle_inference_api.h"
#else
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif
#else
#include "paddle_inference_api.h" // NOLINT
#endif
#include "core/general-server/general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h"
namespace
baidu
{
namespace
paddle_serving
{
namespace
serving
{
class
GeneralDistKVQuantInferOp
:
public
baidu
::
paddle_serving
::
predictor
::
OpWithChannel
<
GeneralBlob
>
{
public:
typedef
std
::
vector
<
paddle
::
PaddleTensor
>
TensorVector
;
DECLARE_OP
(
GeneralDistKVQuantInferOp
);
int
inference
();
};
}
// namespace serving
}
// namespace paddle_serving
}
// namespace baidu
core/predictor/framework/resource.cpp
浏览文件 @
24c78e3b
...
@@ -151,6 +151,18 @@ int Resource::initialize(const std::string& path, const std::string& file) {
...
@@ -151,6 +151,18 @@ int Resource::initialize(const std::string& path, const std::string& file) {
std
::
string
cube_config_fullpath
=
"./"
+
resource_conf
.
cube_config_path
()
+
std
::
string
cube_config_fullpath
=
"./"
+
resource_conf
.
cube_config_path
()
+
"/"
+
resource_conf
.
cube_config_file
();
"/"
+
resource_conf
.
cube_config_file
();
this
->
cube_config_fullpath
=
cube_config_fullpath
;
this
->
cube_config_fullpath
=
cube_config_fullpath
;
this
->
cube_quant_bits
=
resource_conf
.
has_cube_quant_bits
()
?
resource_conf
.
cube_quant_bits
()
:
0
;
if
(
this
->
cube_quant_bits
!=
0
&&
this
->
cube_quant_bits
!=
8
)
{
LOG
(
ERROR
)
<<
"Cube quant bits illegal! should be 0 or 8."
;
return
-
1
;
}
if
(
this
->
cube_quant_bits
==
0
)
{
LOG
(
INFO
)
<<
"cube quant mode OFF"
;
}
else
{
LOG
(
INFO
)
<<
"cube quant mode ON, quant bits: "
<<
this
->
cube_quant_bits
;
}
}
}
THREAD_SETSPECIFIC
(
_tls_bspec_key
,
NULL
);
THREAD_SETSPECIFIC
(
_tls_bspec_key
,
NULL
);
...
@@ -258,38 +270,6 @@ int Resource::general_model_initialize(const std::string& path,
...
@@ -258,38 +270,6 @@ int Resource::general_model_initialize(const std::string& path,
return
0
;
return
0
;
}
}
int
Resource
::
cube_initialize
(
const
std
::
string
&
path
,
const
std
::
string
&
file
)
{
// cube
if
(
!
FLAGS_enable_cube
)
{
return
0
;
}
ResourceConf
resource_conf
;
if
(
configure
::
read_proto_conf
(
path
,
file
,
&
resource_conf
)
!=
0
)
{
LOG
(
ERROR
)
<<
"Failed initialize resource from: "
<<
path
<<
"/"
<<
file
;
return
-
1
;
}
int
err
=
0
;
std
::
string
cube_config_file
=
resource_conf
.
cube_config_file
();
if
(
err
!=
0
)
{
LOG
(
ERROR
)
<<
"reade cube_config_file failed, path["
<<
path
<<
"], file["
<<
cube_config_file
<<
"]"
;
return
-
1
;
}
err
=
CubeAPI
::
instance
()
->
init
(
cube_config_file
.
c_str
());
if
(
err
!=
0
)
{
LOG
(
ERROR
)
<<
"failed initialize cube, config: "
<<
cube_config_file
<<
" error code : "
<<
err
;
return
-
1
;
}
LOG
(
INFO
)
<<
"Successfully initialize cube"
;
return
0
;
}
int
Resource
::
thread_initialize
()
{
int
Resource
::
thread_initialize
()
{
// mempool
// mempool
if
(
MempoolWrapper
::
instance
().
thread_initialize
()
!=
0
)
{
if
(
MempoolWrapper
::
instance
().
thread_initialize
()
!=
0
)
{
...
@@ -373,6 +353,7 @@ int Resource::thread_clear() {
...
@@ -373,6 +353,7 @@ int Resource::thread_clear() {
// ...
// ...
return
0
;
return
0
;
}
}
size_t
Resource
::
get_cube_quant_bits
()
{
return
this
->
cube_quant_bits
;
}
int
Resource
::
reload
()
{
int
Resource
::
reload
()
{
if
(
FLAGS_enable_model_toolkit
&&
InferManager
::
instance
().
reload
()
!=
0
)
{
if
(
FLAGS_enable_model_toolkit
&&
InferManager
::
instance
().
reload
()
!=
0
)
{
...
...
core/predictor/framework/resource.h
浏览文件 @
24c78e3b
...
@@ -82,7 +82,6 @@ class Resource {
...
@@ -82,7 +82,6 @@ class Resource {
}
}
int
initialize
(
const
std
::
string
&
path
,
const
std
::
string
&
file
);
int
initialize
(
const
std
::
string
&
path
,
const
std
::
string
&
file
);
int
cube_initialize
(
const
std
::
string
&
path
,
const
std
::
string
&
file
);
int
general_model_initialize
(
const
std
::
string
&
path
,
int
general_model_initialize
(
const
std
::
string
&
path
,
const
std
::
string
&
file
);
const
std
::
string
&
file
);
...
@@ -104,11 +103,13 @@ class Resource {
...
@@ -104,11 +103,13 @@ class Resource {
return
reinterpret_cast
<
DynamicResource
*>
(
return
reinterpret_cast
<
DynamicResource
*>
(
THREAD_GETSPECIFIC
(
_tls_bspec_key
));
THREAD_GETSPECIFIC
(
_tls_bspec_key
));
}
}
size_t
get_cube_quant_bits
();
private:
private:
int
thread_finalize
()
{
return
0
;
}
int
thread_finalize
()
{
return
0
;
}
std
::
shared_ptr
<
PaddleGeneralModelConfig
>
_config
;
std
::
shared_ptr
<
PaddleGeneralModelConfig
>
_config
;
std
::
string
cube_config_fullpath
;
std
::
string
cube_config_fullpath
;
int
cube_quant_bits
;
// 0 if no empty
THREAD_KEY_T
_tls_bspec_key
;
THREAD_KEY_T
_tls_bspec_key
;
};
};
...
...
core/predictor/src/pdserving.cpp
浏览文件 @
24c78e3b
...
@@ -202,14 +202,6 @@ int main(int argc, char** argv) {
...
@@ -202,14 +202,6 @@ int main(int argc, char** argv) {
}
}
VLOG
(
2
)
<<
"Succ call pthread worker start function"
;
VLOG
(
2
)
<<
"Succ call pthread worker start function"
;
if
(
Resource
::
instance
().
cube_initialize
(
FLAGS_resource_path
,
FLAGS_resource_file
)
!=
0
)
{
LOG
(
ERROR
)
<<
"Failed initialize cube, conf: "
<<
FLAGS_resource_path
<<
"/"
<<
FLAGS_resource_file
;
return
-
1
;
}
VLOG
(
2
)
<<
"Succ initialize cube"
;
#ifndef BCLOUD
#ifndef BCLOUD
if
(
Resource
::
instance
().
general_model_initialize
(
FLAGS_resource_path
,
if
(
Resource
::
instance
().
general_model_initialize
(
FLAGS_resource_path
,
...
...
core/predictor/tools/CMakeLists.txt
浏览文件 @
24c78e3b
set
(
seq_gen_src
${
CMAKE_CURRENT_LIST_DIR
}
/seq_generator.cpp
${
CMAKE_CURRENT_LIST_DIR
}
/seq_file.cpp
)
set
(
seq_gen_src
${
CMAKE_CURRENT_LIST_DIR
}
/seq_generator.cpp
${
CMAKE_CURRENT_LIST_DIR
}
/seq_file.cpp
${
CMAKE_CURRENT_LIST_DIR
}
/quant.cpp
)
LIST
(
APPEND seq_gen_src
${
PROTO_SRCS
}
)
LIST
(
APPEND seq_gen_src
${
PROTO_SRCS
}
)
add_executable
(
seq_generator
${
seq_gen_src
}
)
add_executable
(
seq_generator
${
seq_gen_src
}
)
target_link_libraries
(
seq_generator protobuf -lpthread
)
target_link_libraries
(
seq_generator protobuf -lpthread
)
core/predictor/tools/quant.cpp
0 → 100644
浏览文件 @
24c78e3b
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "quant.h"
#include <cmath>
#include <cstring>
#include <fstream>
#include <iostream>
#include <memory>
#include <mutex>
#include <string>
#include "seq_file.h"
using
paddle
::
framework
::
proto
::
VarType
;
float
compute_loss
(
float
*
a
,
float
*
b
,
int
emb_size
)
{
float
sum
=
0
;
for
(
size_t
i
=
0
;
i
<
emb_size
;
i
++
)
{
sum
+=
(
a
[
i
]
-
b
[
i
])
*
(
a
[
i
]
-
b
[
i
]);
}
return
sum
;
}
float
*
transfer
(
float
*
in
,
float
*
out
,
float
min
,
float
max
,
int
emb_size
,
int
bits
)
{
float
scale
=
(
max
-
min
)
/
pow
(
2
,
bits
);
for
(
size_t
i
=
0
;
i
<
emb_size
;
i
++
)
{
float
x
=
in
[
i
];
int
val
=
round
((
x
-
min
)
/
(
max
-
min
)
*
(
pow
(
2
,
bits
)
-
1
));
val
=
std
::
max
(
0
,
val
);
val
=
std
::
min
((
int
)
pow
(
2
,
bits
)
-
1
,
val
);
out
[
i
]
=
val
*
scale
+
min
;
}
return
out
;
}
char
*
quant
(
float
*
in
,
char
**
out
,
float
min
,
float
max
,
int
emb_size
,
int
bits
)
{
float
scale
=
(
max
-
min
)
/
pow
(
2
,
bits
);
for
(
size_t
i
=
0
;
i
<
emb_size
;
++
i
)
{
float
x
=
in
[
i
];
int
val
=
round
((
x
-
min
)
/
(
max
-
min
)
*
(
pow
(
2
,
bits
)
-
1
));
val
=
std
::
max
(
0
,
val
);
val
=
std
::
min
((
int
)
pow
(
2
,
bits
)
-
1
,
val
);
*
out
[
emb_size
]
=
val
;
}
return
*
out
;
}
float
*
dequant
(
char
*
in
,
float
*
out
,
float
min
,
float
max
,
int
emb_size
,
int
bits
)
{
float
scale
=
(
max
-
min
)
/
pow
(
2
,
bits
);
for
(
size_t
i
=
0
;
i
<
emb_size
;
++
i
)
{
float
x
=
scale
*
(((
int
)
in
[
i
]
+
(
int
)
pow
(
2
,
bits
))
%
(
int
)
pow
(
2
,
bits
))
+
min
;
out
[
i
]
=
x
;
}
return
out
;
}
void
greedy_search
(
float
*
in
,
float
&
xmin
,
float
&
xmax
,
float
&
loss
,
size_t
emb_size
,
int
bits
)
{
int
b
=
200
;
float
r
=
0.16
;
xmin
=
2147483647
;
xmax
=
-
2147483648
;
float
cur_min
=
xmin
;
float
cur_max
=
xmax
;
for
(
size_t
i
=
0
;
i
<
emb_size
;
i
++
)
{
xmin
=
std
::
min
(
xmin
,
in
[
i
]);
xmax
=
std
::
max
(
xmax
,
in
[
i
]);
}
cur_min
=
xmin
;
cur_max
=
xmax
;
float
out
[
emb_size
];
loss
=
compute_loss
(
in
,
transfer
(
in
,
out
,
cur_min
,
cur_max
,
emb_size
,
bits
),
emb_size
);
float
stepsize
=
(
cur_max
-
cur_min
)
/
b
;
float
min_steps
=
b
*
(
1
-
r
)
*
stepsize
;
while
(
cur_min
+
min_steps
<
cur_max
)
{
float
loss_l
=
compute_loss
(
in
,
transfer
(
in
,
out
,
cur_min
+
stepsize
,
cur_max
,
emb_size
,
bits
),
emb_size
);
float
loss_r
=
compute_loss
(
in
,
transfer
(
in
,
out
,
cur_min
,
cur_max
-
stepsize
,
emb_size
,
bits
),
emb_size
);
if
(
loss_l
<
loss
)
{
cur_min
=
cur_min
+
stepsize
;
if
(
loss_l
<
loss_r
)
{
loss
=
loss_l
;
xmin
=
cur_min
;
}
}
else
{
cur_max
=
cur_max
-
stepsize
;
if
(
loss_r
<
loss
)
{
loss
=
loss_r
;
xmax
=
cur_max
;
}
}
}
}
core/predictor/tools/quant.h
0 → 100644
浏览文件 @
24c78e3b
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cmath>
#include <cstring>
#include <fstream>
#include <iostream>
#include <memory>
#include <mutex>
#include <string>
#include "core/predictor/framework.pb.h"
#include "seq_file.h"
using
paddle
::
framework
::
proto
::
VarType
;
void
greedy_search
(
float
*
in
,
float
&
xmin
,
float
&
xmax
,
float
&
loss
,
size_t
emb_size
,
int
bits
);
// std::mutex g_mtx;
float
compute_loss
(
float
*
a
,
float
*
b
,
int
emb_size
);
float
*
transfer
(
float
*
in
,
float
*
out
,
float
min
,
float
max
,
int
emb_size
,
int
bits
);
char
*
quant
(
float
*
in
,
char
**
out
,
float
min
,
float
max
,
int
emb_size
,
int
bits
);
float
*
dequant
(
char
*
in
,
float
*
out
,
float
min
,
float
max
,
int
emb_size
,
int
bits
);
void
greedy_search
(
float
*
in
,
float
&
xmin
,
float
&
xmax
,
float
&
loss
,
size_t
emb_size
,
int
bits
);
core/predictor/tools/seq_generator.cpp
浏览文件 @
24c78e3b
...
@@ -16,7 +16,9 @@
...
@@ -16,7 +16,9 @@
#include <iostream>
#include <iostream>
#include <memory>
#include <memory>
#include "core/predictor/framework.pb.h"
#include "core/predictor/framework.pb.h"
#include "quant.h"
#include "seq_file.h"
#include "seq_file.h"
using
paddle
::
framework
::
proto
::
VarType
;
using
paddle
::
framework
::
proto
::
VarType
;
std
::
map
<
int
,
size_t
>
var_type_size
;
std
::
map
<
int
,
size_t
>
var_type_size
;
void
reg_var_types
()
{
void
reg_var_types
()
{
...
@@ -31,6 +33,7 @@ void reg_var_types() {
...
@@ -31,6 +33,7 @@ void reg_var_types() {
var_type_size
[
static_cast
<
int
>
(
VarType
::
UINT8
)]
=
sizeof
(
uint8_t
);
var_type_size
[
static_cast
<
int
>
(
VarType
::
UINT8
)]
=
sizeof
(
uint8_t
);
var_type_size
[
static_cast
<
int
>
(
VarType
::
INT8
)]
=
sizeof
(
int8_t
);
var_type_size
[
static_cast
<
int
>
(
VarType
::
INT8
)]
=
sizeof
(
int8_t
);
}
}
int
dump_parameter
(
const
char
*
input_file
,
const
char
*
output_file
)
{
int
dump_parameter
(
const
char
*
input_file
,
const
char
*
output_file
)
{
std
::
ifstream
is
(
input_file
);
std
::
ifstream
is
(
input_file
);
// the 1st field, unit32_t version for LoDTensor
// the 1st field, unit32_t version for LoDTensor
...
@@ -105,12 +108,127 @@ int dump_parameter(const char *input_file, const char *output_file) {
...
@@ -105,12 +108,127 @@ int dump_parameter(const char *input_file, const char *output_file) {
}
}
return
0
;
return
0
;
}
}
int
compress_parameter
(
const
char
*
file1
,
const
char
*
file2
,
int
bits
)
{
std
::
ifstream
is
(
file1
);
// Step 1: is read version, os write version
uint32_t
version
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
version
),
sizeof
(
version
));
if
(
version
!=
0
)
{
std
::
cout
<<
"Version number "
<<
version
<<
" not supported"
<<
std
::
endl
;
return
-
1
;
}
std
::
cout
<<
"Version size: "
<<
sizeof
(
version
)
<<
std
::
endl
;
// Step 2: is read LoD level, os write LoD level
uint64_t
lod_level
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
lod_level
),
sizeof
(
lod_level
));
std
::
vector
<
std
::
vector
<
size_t
>>
lod
;
lod
.
resize
(
lod_level
);
for
(
uint64_t
i
=
0
;
i
<
lod_level
;
++
i
)
{
uint64_t
size
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
size
),
sizeof
(
size
));
std
::
vector
<
size_t
>
tmp
(
size
/
sizeof
(
size_t
));
is
.
read
(
reinterpret_cast
<
char
*>
(
tmp
.
data
()),
static_cast
<
std
::
streamsize
>
(
size
));
lod
[
i
]
=
tmp
;
}
// Step 3: is read Protobuf os Write Protobuf
// Note: duplicate version field
is
.
read
(
reinterpret_cast
<
char
*>
(
&
version
),
sizeof
(
version
));
if
(
version
!=
0
)
{
std
::
cout
<<
"Version number "
<<
version
<<
" not supported"
<<
std
::
endl
;
return
-
1
;
}
// Step 4: is read Tensor Data, os write min/max/quant data
VarType
::
TensorDesc
desc
;
int32_t
size
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
size
),
sizeof
(
size
));
std
::
unique_ptr
<
char
[]
>
buf
(
new
char
[
size
]);
is
.
read
(
reinterpret_cast
<
char
*>
(
buf
.
get
()),
size
);
if
(
!
desc
.
ParseFromArray
(
buf
.
get
(),
size
))
{
std
::
cout
<<
"Cannot parse tensor desc"
<<
std
::
endl
;
return
-
1
;
}
// read tensor
std
::
vector
<
int64_t
>
dims
;
dims
.
reserve
(
static_cast
<
size_t
>
(
desc
.
dims
().
size
()));
std
::
copy
(
desc
.
dims
().
begin
(),
desc
.
dims
().
end
(),
std
::
back_inserter
(
dims
));
std
::
cout
<<
"Dims:"
;
for
(
auto
x
:
dims
)
{
std
::
cout
<<
" "
<<
x
;
}
std
::
cout
<<
std
::
endl
;
if
(
dims
.
size
()
!=
2
)
{
std
::
cout
<<
"Parameter dims not 2D"
<<
std
::
endl
;
return
-
1
;
}
size_t
numel
=
1
;
for
(
auto
x
:
dims
)
{
numel
*=
x
;
}
size_t
buf_size
=
numel
*
var_type_size
[
desc
.
data_type
()];
std
::
cout
<<
buf_size
<<
std
::
endl
;
char
*
tensor_buf
=
new
char
[
buf_size
];
is
.
read
(
static_cast
<
char
*>
(
tensor_buf
),
buf_size
);
float
*
tensor_float_buf
=
reinterpret_cast
<
float
*>
(
tensor_buf
);
size_t
per_line_size
=
dims
[
1
]
*
1
+
2
*
sizeof
(
float
);
char
*
tensor_out
=
new
char
[
per_line_size
*
dims
[
0
]];
float
loss
=
0
;
float
all_loss
=
0
;
std
::
cout
<<
"Start Quant"
<<
std
::
endl
;
SeqFileWriter
seq_file_writer
(
file2
);
size_t
offset
=
0
;
for
(
int64_t
i
=
0
;
i
<
dims
[
0
];
++
i
)
{
float
xmin
=
0
,
xmax
=
0
,
loss
=
0
;
size_t
scale
=
dims
[
1
];
char
*
tensor_temp
=
new
char
[
per_line_size
];
greedy_search
(
tensor_float_buf
+
i
*
dims
[
1
],
xmin
,
xmax
,
loss
,
scale
,
bits
);
for
(
size_t
e
=
0
;
e
<
dims
[
1
];
++
e
)
{
float
x
=
*
(
tensor_float_buf
+
i
*
dims
[
1
]
+
e
);
int
val
=
round
((
x
-
xmin
)
/
(
xmax
-
xmin
)
*
(
pow
(
2
,
bits
)
-
1
));
val
=
std
::
max
(
0
,
val
);
val
=
std
::
min
((
int
)
pow
(
2
,
bits
)
-
1
,
val
);
char
*
min_ptr
=
tensor_temp
;
char
*
max_ptr
=
tensor_temp
+
sizeof
(
float
);
memcpy
(
min_ptr
,
&
xmin
,
sizeof
(
float
));
memcpy
(
max_ptr
,
&
xmax
,
sizeof
(
float
));
*
(
tensor_temp
+
2
*
sizeof
(
float
)
+
e
)
=
val
;
float
unit
=
(
xmax
-
xmin
)
/
pow
(
2
,
bits
);
float
trans_val
=
unit
*
val
+
xmin
;
}
seq_file_writer
.
write
((
char
*
)
&
i
,
sizeof
(
i
),
tensor_temp
,
per_line_size
);
}
return
0
;
}
int
main
(
int
argc
,
char
**
argv
)
{
int
main
(
int
argc
,
char
**
argv
)
{
if
(
argc
!=
3
)
{
if
(
argc
<
3
||
argc
>
4
)
{
std
::
cout
<<
"Usage: seq_generator PARAMETER_FILE OUTPUT_FILE"
<<
std
::
endl
;
std
::
cout
<<
"Usage: if no compress, please follow:"
<<
std
::
endl
;
std
::
cout
<<
"seq_generator PARAMETER_FILE OUTPUT_FILE
\n
"
<<
std
::
endl
;
std
::
cout
<<
"if compress, please follow: "
<<
std
::
endl
;
std
::
cout
<<
"seq_generator PARAMETER_FILE OUTPUT_FILE QUANT_BITS"
<<
std
::
endl
;
std
::
cout
<<
"Now it only support 8 bit."
<<
std
::
endl
;
return
-
1
;
return
-
1
;
}
}
reg_var_types
();
reg_var_types
();
dump_parameter
(
argv
[
1
],
argv
[
2
]);
if
(
argc
==
3
)
{
std
::
cout
<<
"generate normal sparse param sequence file"
<<
std
::
endl
;
dump_parameter
(
argv
[
1
],
argv
[
2
]);
return
0
;
}
if
(
argc
==
4
)
{
std
::
cout
<<
"generate compressed sparse param sequence file"
<<
std
::
endl
;
compress_parameter
(
argv
[
1
],
argv
[
2
],
atoi
(
argv
[
3
]));
return
0
;
}
}
}
/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */
/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */
python/examples/criteo_ctr_with_cube/clean.sh
浏览文件 @
24c78e3b
ps
-ef
|
grep
cube |
awk
{
'print $2'
}
| xargs
kill
-9
ps
-ef
|
grep
cube |
awk
{
'print $2'
}
| xargs
kill
-9
ps
-ef
|
grep
SimpleHTTPServer |
awk
{
'print $2'
}
| xargs
kill
-9
rm
-rf
cube/cube_data cube/data cube/log
*
cube/nohup
*
cube/output/ cube/donefile cube/input cube/monitor cube/cube-builder.INFO
rm
-rf
cube/cube_data cube/data cube/log
*
cube/nohup
*
cube/output/ cube/donefile cube/input cube/monitor cube/cube-builder.INFO
ps
-ef
|
grep test
|
awk
{
'print $2'
}
| xargs
kill
-9
ps
-ef
|
grep test
|
awk
{
'print $2'
}
| xargs
kill
-9
ps
-ef
|
grep
serving |
awk
{
'print $2'
}
| xargs
kill
-9
ps
-ef
|
grep
serving |
awk
{
'print $2'
}
| xargs
kill
-9
python/examples/criteo_ctr_with_cube/cube_prepare.sh
浏览文件 @
24c78e3b
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
#! /bin/bash
mkdir
-p
cube_model
mkdir
-p
cube_model
mkdir
-p
cube/data
mkdir
-p
cube/data
./seq_generator ctr_serving_model/SparseFeatFactors ./cube_model/feature
./seq_generator ctr_serving_model/SparseFeatFactors ./cube_model/feature
./cube/cube-builder
-dict_name
=
test_dict
-job_mode
=
base
-last_version
=
0
-cur_version
=
0
-depend_version
=
0
-input_path
=
./cube_model
-output_path
=
./cube/data
-shard_num
=
1
-only_build
=
false
./cube/cube-builder
-dict_name
=
test_dict
-job_mode
=
base
-last_version
=
0
-cur_version
=
0
-depend_version
=
0
-input_path
=
./cube_model
-output_path
=
./cube/data
-shard_num
=
1
-only_build
=
false
mv
./cube/data/0_0/test_dict_part0/
*
./cube/data/
mv
./cube/data/0_0/test_dict_part0/
*
./cube/data/
cd
cube
&&
./cube
cd
cube
&&
./cube
python/examples/criteo_ctr_with_cube/cube_quant_prepare.sh
0 → 100755
浏览文件 @
24c78e3b
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
#! /bin/bash
mkdir
-p
cube_model
mkdir
-p
cube/data
./seq_generator ctr_serving_model/SparseFeatFactors ./cube_model/feature 8
./cube/cube-builder
-dict_name
=
test_dict
-job_mode
=
base
-last_version
=
0
-cur_version
=
0
-depend_version
=
0
-input_path
=
./cube_model
-output_path
=
./cube/data
-shard_num
=
1
-only_build
=
false
mv
./cube/data/0_0/test_dict_part0/
*
./cube/data/
cd
cube
&&
./cube
python/examples/criteo_ctr_with_cube/test_server_quant.py
0 → 100755
浏览文件 @
24c78e3b
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
import
os
import
sys
from
paddle_serving_server
import
OpMaker
from
paddle_serving_server
import
OpSeqMaker
from
paddle_serving_server
import
Server
op_maker
=
OpMaker
()
read_op
=
op_maker
.
create
(
'general_reader'
)
general_dist_kv_infer_op
=
op_maker
.
create
(
'general_dist_kv_quant_infer'
)
response_op
=
op_maker
.
create
(
'general_response'
)
op_seq_maker
=
OpSeqMaker
()
op_seq_maker
.
add_op
(
read_op
)
op_seq_maker
.
add_op
(
general_dist_kv_infer_op
)
op_seq_maker
.
add_op
(
response_op
)
server
=
Server
()
server
.
set_op_sequence
(
op_seq_maker
.
get_op_sequence
())
server
.
set_num_threads
(
4
)
server
.
load_model_config
(
sys
.
argv
[
1
])
server
.
prepare_server
(
workdir
=
"work_dir1"
,
port
=
9292
,
device
=
"cpu"
)
server
.
run_server
()
python/paddle_serving_server/__init__.py
浏览文件 @
24c78e3b
...
@@ -33,7 +33,7 @@ class OpMaker(object):
...
@@ -33,7 +33,7 @@ class OpMaker(object):
"general_text_response"
:
"GeneralTextResponseOp"
,
"general_text_response"
:
"GeneralTextResponseOp"
,
"general_single_kv"
:
"GeneralSingleKVOp"
,
"general_single_kv"
:
"GeneralSingleKVOp"
,
"general_dist_kv_infer"
:
"GeneralDistKVInferOp"
,
"general_dist_kv_infer"
:
"GeneralDistKVInferOp"
,
"general_dist_kv
"
:
"GeneralDistKV
Op"
,
"general_dist_kv
_quant_infer"
:
"GeneralDistKVQuantInfer
Op"
,
"general_copy"
:
"GeneralCopyOp"
"general_copy"
:
"GeneralCopyOp"
}
}
...
@@ -164,6 +164,8 @@ class Server(object):
...
@@ -164,6 +164,8 @@ class Server(object):
if
"dist_kv"
in
node
.
name
:
if
"dist_kv"
in
node
.
name
:
self
.
resource_conf
.
cube_config_path
=
workdir
self
.
resource_conf
.
cube_config_path
=
workdir
self
.
resource_conf
.
cube_config_file
=
self
.
cube_config_fn
self
.
resource_conf
.
cube_config_file
=
self
.
cube_config_fn
if
"quant"
in
node
.
name
:
self
.
resource_conf
.
cube_quant_bits
=
8
self
.
resource_conf
.
model_toolkit_path
=
workdir
self
.
resource_conf
.
model_toolkit_path
=
workdir
self
.
resource_conf
.
model_toolkit_file
=
self
.
model_toolkit_fn
self
.
resource_conf
.
model_toolkit_file
=
self
.
model_toolkit_fn
self
.
resource_conf
.
general_model_path
=
workdir
self
.
resource_conf
.
general_model_path
=
workdir
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录