Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Serving
提交
aad622c5
S
Serving
项目概览
PaddlePaddle
/
Serving
大约 1 年 前同步成功
通知
186
Star
833
Fork
253
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
105
列表
看板
标记
里程碑
合并请求
10
Wiki
2
Wiki
分析
仓库
DevOps
项目成员
Pages
S
Serving
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
105
Issue
105
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
2
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
aad622c5
编写于
11月 15, 2019
作者:
W
wangguibao
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into elastic_ctr
上级
c59b9cc9
9978f48a
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
257 addition
and
56 deletion
+257
-56
Dockerfile
Dockerfile
+26
-0
Dockerfile.gpu
Dockerfile.gpu
+11
-0
cube/cube-api/src/cube_cli.cpp
cube/cube-api/src/cube_cli.cpp
+106
-40
demo-client/src/bert_service.cpp
demo-client/src/bert_service.cpp
+4
-4
demo-serving/op/bert_service_op.cpp
demo-serving/op/bert_service_op.cpp
+31
-6
demo-serving/proto/bert_service.proto
demo-serving/proto/bert_service.proto
+7
-3
doc/DOCKER.md
doc/DOCKER.md
+63
-0
doc/INSTALL.md
doc/INSTALL.md
+2
-0
sdk-cpp/proto/bert_service.proto
sdk-cpp/proto/bert_service.proto
+7
-3
未找到文件。
Dockerfile
0 → 100644
浏览文件 @
aad622c5
FROM
centos:centos6.10
RUN
export
http_proxy
=
http://172.19.56.199:3128
\
&&
export
https_proxy
=
http://172.19.56.199:3128
\
&&
yum
-y
install
wget
\
&&
wget http://people.centos.org/tru/devtools-2/devtools-2.repo
-O
/etc/yum.repos.d/devtoolset-2.repo
\
&&
yum
-y
install
devtoolset-2-gcc devtoolset-2-gcc-c++ devtoolset-2-binutils
\
&&
source
/opt/rh/devtoolset-2/enable
\
&&
echo
"source /opt/rh/devtoolset-2/enable"
>>
/etc/profile
\
&&
yum
-y
install
git openssl-devel curl-devel bzip2-devel
\
&&
wget https://cmake.org/files/v3.5/cmake-3.5.2.tar.gz
\
&&
tar
xvf cmake-3.5.2.tar.gz
\
&&
cd
cmake-3.5.2
\
&&
./bootstrap
--prefix
=
/usr
\
&&
make
\
&&
make
install
\
&&
cd
..
\
&&
rm
-r
cmake-3.5.2
*
\
&&
wget https://dl.google.com/go/go1.12.12.linux-amd64.tar.gz
\
&&
tar
-xzvf
go1.12.12.linux-amd64.tar.gz
\
&&
mv
go /usr/local/go
\
&&
rm
go1.12.12.linux-amd64.tar.gz
\
&&
echo
"export GOROOT=/usr/local/go"
>>
/root/.bashrc
\
&&
echo
"export GOPATH=
$HOME
/go"
>>
/root/.bashrc
\
&&
echo
"export PATH=
$PATH
:/usr/local/go/bin"
>>
/root/.bashrc
Dockerfile.gpu
0 → 100644
浏览文件 @
aad622c5
FROM paddlepaddle/paddle_manylinux_devel:cuda9.0_cudnn7
RUN yum -y install git openssl-devel curl-devel bzip2-devel \
&& wget https://dl.google.com/go/go1.12.12.linux-amd64.tar.gz \
&& tar -xzvf go1.12.12.linux-amd64.tar.gz \
&& rm -rf /usr/local/go \
&& mv go /usr/local/go \
&& rm go1.12.12.linux-amd64.tar.gz \
&& echo "GOROOT=/usr/local/go" >> /root/.bashrc \
&& echo "GOPATH=$HOME/go" >> /root/.bashrc \
&& echo "PATH=$PATH:$GOROOT/bin" >> /root/.bashrc
cube/cube-api/src/cube_cli.cpp
浏览文件 @
aad622c5
...
...
@@ -14,9 +14,9 @@
#include <gflags/gflags.h>
#include <atomic>
#include <fstream>
#include <thread> //NOLINT
#include "cube/cube-api/include/cube_api.h"
#define TIME_FLAG(flag) \
struct timeval flag; \
gettimeofday(&(flag), NULL);
...
...
@@ -28,6 +28,11 @@ DEFINE_uint64(batch, 500, "batch size");
DEFINE_int32
(
timeout
,
200
,
"timeout in ms"
);
DEFINE_int32
(
retry
,
3
,
"retry times"
);
DEFINE_bool
(
print_output
,
false
,
"print output flag"
);
DEFINE_int32
(
thread_num
,
1
,
"thread num"
);
std
::
atomic
<
int
>
g_concurrency
(
0
);
std
::
vector
<
uint64_t
>
time_list
;
std
::
vector
<
uint64_t
>
request_list
;
namespace
{
inline
uint64_t
time_diff
(
const
struct
timeval
&
start_time
,
...
...
@@ -53,7 +58,7 @@ std::string string_to_hex(const std::string& input) {
return
output
;
}
int
run
(
int
argc
,
char
**
argv
)
{
int
run
(
int
argc
,
char
**
argv
,
int
thread_id
)
{
google
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
true
);
CubeAPI
*
cube
=
CubeAPI
::
instance
();
...
...
@@ -62,13 +67,13 @@ int run(int argc, char** argv) {
LOG
(
ERROR
)
<<
"init cube api failed err="
<<
ret
;
return
ret
;
}
FILE
*
key_file
=
fopen
(
FLAGS_keys
.
c_str
(),
"r"
);
if
(
key_file
==
NULL
)
{
LOG
(
ERROR
)
<<
"open key file ["
<<
FLAGS_keys
<<
"] failed"
;
return
-
1
;
}
/*
FILE* key_file = fopen(FLAGS_keys.c_str(), "r");
if (key_file == NULL) {
LOG(ERROR) << "open key file [" << FLAGS_keys << "] failed";
return -1;
}
*/
std
::
atomic
<
uint64_t
>
seek_counter
(
0
);
std
::
atomic
<
uint64_t
>
seek_cost_total
(
0
);
uint64_t
seek_cost_max
=
0
;
...
...
@@ -78,14 +83,32 @@ int run(int argc, char** argv) {
std
::
vector
<
uint64_t
>
keys
;
std
::
vector
<
CubeValue
>
values
;
while
(
fgets
(
buffer
,
1024
,
key_file
))
{
uint64_t
key
=
strtoul
(
buffer
,
NULL
,
10
);
keys
.
push_back
(
key
);
std
::
string
line
;
std
::
vector
<
int64_t
>
key_list
;
std
::
ifstream
key_file
(
FLAGS_keys
.
c_str
());
while
(
getline
(
key_file
,
line
))
{
key_list
.
push_back
(
std
::
stoll
(
line
));
}
uint64_t
file_size
=
key_list
.
size
();
uint64_t
index
=
0
;
uint64_t
request
=
0
;
while
(
g_concurrency
.
load
()
>=
FLAGS_thread_num
)
{
}
g_concurrency
++
;
while
(
index
<
file_size
)
{
// uint64_t key = strtoul(buffer, NULL, 10);
keys
.
push_back
(
key_list
[
index
]);
index
+=
1
;
int
ret
=
0
;
if
(
keys
.
size
()
>
FLAGS_batch
)
{
TIME_FLAG
(
seek_start
);
ret
=
cube
->
seek
(
FLAGS_dict
,
keys
,
&
values
);
TIME_FLAG
(
seek_end
);
request
+=
1
;
if
(
ret
!=
0
)
{
LOG
(
WARNING
)
<<
"cube seek failed"
;
}
else
if
(
FLAGS_print_output
)
{
...
...
@@ -110,37 +133,40 @@ int run(int argc, char** argv) {
values
.
clear
();
}
}
/*
if (keys.size() > 0) {
int ret = 0;
values.resize(keys.size());
TIME_FLAG(seek_start);
ret = cube->seek(FLAGS_dict, keys, &values);
TIME_FLAG(seek_end);
if (ret != 0) {
LOG(WARNING) << "cube seek failed";
} else if (FLAGS_print_output) {
for (size_t i = 0; i < keys.size(); ++i) {
fprintf(stdout,
"key:%lu value:%s\n",
keys[i],
string_to_hex(values[i].buff).c_str());
}
}
if
(
keys
.
size
()
>
0
)
{
int
ret
=
0
;
values
.
resize
(
keys
.
size
());
TIME_FLAG
(
seek_start
);
ret
=
cube
->
seek
(
FLAGS_dict
,
keys
,
&
values
);
TIME_FLAG
(
seek_end
);
if
(
ret
!=
0
)
{
LOG
(
WARNING
)
<<
"cube seek failed"
;
}
else
if
(
FLAGS_print_output
)
{
for
(
size_t
i
=
0
;
i
<
keys
.
size
();
++
i
)
{
fprintf
(
stdout
,
"key:%lu value:%s
\n
"
,
keys
[
i
],
string_to_hex
(
values
[
i
].
buff
).
c_str
());
++seek_counter;
uint64_t seek_cost = time_diff(seek_start, seek_end);
seek_cost_total += seek_cost;
if (seek_cost > seek_cost_max) {
seek_cost_max = seek_cost;
}
if (seek_cost < seek_cost_min) {
seek_cost_min = seek_cost;
}
}
*/
g_concurrency
--
;
++
seek_counter
;
uint64_t
seek_cost
=
time_diff
(
seek_start
,
seek_end
);
seek_cost_total
+=
seek_cost
;
if
(
seek_cost
>
seek_cost_max
)
{
seek_cost_max
=
seek_cost
;
}
if
(
seek_cost
<
seek_cost_min
)
{
seek_cost_min
=
seek_cost
;
}
}
fclose
(
key_file
);
// fclose(key_file);
ret
=
cube
->
destroy
();
//
ret = cube->destroy();
if
(
ret
!=
0
)
{
LOG
(
WARNING
)
<<
"destroy cube api failed err="
<<
ret
;
}
...
...
@@ -150,10 +176,50 @@ int run(int argc, char** argv) {
LOG
(
INFO
)
<<
"seek cost max = "
<<
seek_cost_max
;
LOG
(
INFO
)
<<
"seek cost min = "
<<
seek_cost_min
;
time_list
[
thread_id
]
=
seek_cost_avg
;
request_list
[
thread_id
]
=
request
;
return
0
;
}
int
run_m
(
int
argc
,
char
**
argv
)
{
google
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
true
);
int
thread_num
=
FLAGS_thread_num
;
request_list
.
resize
(
thread_num
);
time_list
.
resize
(
thread_num
);
std
::
vector
<
std
::
thread
*>
thread_pool
;
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
thread_pool
.
push_back
(
new
std
::
thread
(
run
,
argc
,
argv
,
i
));
}
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
thread_pool
[
i
]
->
join
();
delete
thread_pool
[
i
];
}
uint64_t
sum_time
=
0
;
uint64_t
max_time
=
0
;
uint64_t
min_time
=
1000000
;
uint64_t
request_num
=
0
;
for
(
int
i
=
0
;
i
<
thread_num
;
i
++
)
{
sum_time
+=
time_list
[
i
];
if
(
time_list
[
i
]
>
max_time
)
{
max_time
=
time_list
[
i
];
}
if
(
time_list
[
i
]
<
min_time
)
{
min_time
=
time_list
[
i
];
}
request_num
+=
request_list
[
i
];
}
uint64_t
mean_time
=
sum_time
/
thread_num
;
LOG
(
INFO
)
<<
thread_num
<<
" thread seek cost"
<<
" avg = "
<<
std
::
to_string
(
mean_time
)
<<
" max = "
<<
std
::
to_string
(
max_time
)
<<
" min = "
<<
std
::
to_string
(
min_time
);
LOG
(
INFO
)
<<
" total_request = "
<<
std
::
to_string
(
request_num
)
<<
" speed = "
<<
std
::
to_string
(
1000000
*
thread_num
/
mean_time
)
<<
" query per second"
;
}
}
// namespace mcube
}
// namespace rec
int
main
(
int
argc
,
char
**
argv
)
{
return
::
rec
::
mcube
::
run
(
argc
,
argv
);
}
int
main
(
int
argc
,
char
**
argv
)
{
return
::
rec
::
mcube
::
run
_m
(
argc
,
argv
);
}
demo-client/src/bert_service.cpp
浏览文件 @
aad622c5
...
...
@@ -93,9 +93,9 @@ int create_req(Request* req,
ins
->
add_input_masks
(
0.0
);
}
}
ins
->
set_max_seq_len
(
max_seq_len
);
ins
->
set_emb_size
(
emb_size
);
}
req
->
set_max_seq_len
(
max_seq_len
);
req
->
set_emb_size
(
emb_size
);
return
0
;
}
...
...
@@ -118,8 +118,8 @@ void print_res(const Request& req,
oss
<<
"]
\n
"
;
LOG
(
INFO
)
<<
"Receive : "
<<
oss
.
str
();
}
LOG
(
INFO
)
<<
"Succ call predictor[
ctr_prediction_service], the tag is: "
<<
route_tag
<<
", elapse_ms: "
<<
elapse_ms
;
LOG
(
INFO
)
<<
"Succ call predictor[
bert_service], the tag is: "
<<
route_tag
<<
", elapse_ms: "
<<
elapse_ms
;
}
void
thread_worker
(
PredictorApi
*
api
,
...
...
demo-serving/op/bert_service_op.cpp
浏览文件 @
aad622c5
...
...
@@ -28,6 +28,21 @@ using baidu::paddle_serving::predictor::bert_service::BertReqInstance;
using
baidu
::
paddle_serving
::
predictor
::
bert_service
::
Request
;
using
baidu
::
paddle_serving
::
predictor
::
bert_service
::
EmbeddingValues
;
std
::
vector
<
std
::
string
>
split
(
const
std
::
string
&
str
,
const
std
::
string
&
pattern
)
{
std
::
vector
<
std
::
string
>
res
;
if
(
str
==
""
)
return
res
;
std
::
string
strs
=
str
+
pattern
;
size_t
pos
=
strs
.
find
(
pattern
);
while
(
pos
!=
strs
.
npos
)
{
std
::
string
temp
=
strs
.
substr
(
0
,
pos
);
res
.
push_back
(
temp
);
strs
=
strs
.
substr
(
pos
+
1
,
strs
.
size
());
pos
=
strs
.
find
(
pattern
);
}
return
res
;
}
int
BertServiceOp
::
inference
()
{
timeval
op_start
;
gettimeofday
(
&
op_start
,
NULL
);
...
...
@@ -43,17 +58,27 @@ int BertServiceOp::inference() {
return
0
;
}
const
int64_t
MAX_SEQ_LEN
=
req
->
instances
(
0
).
max_seq_len
();
const
int64_t
EMB_SIZE
=
req
->
instances
(
0
).
emb_size
();
const
int64_t
MAX_SEQ_LEN
=
req
->
max_seq_len
();
const
int64_t
EMB_SIZE
=
req
->
emb_size
();
paddle
::
PaddleTensor
src_ids
;
paddle
::
PaddleTensor
pos_ids
;
paddle
::
PaddleTensor
seg_ids
;
paddle
::
PaddleTensor
input_masks
;
src_ids
.
name
=
std
::
string
(
"src_ids"
);
pos_ids
.
name
=
std
::
string
(
"pos_ids"
);
seg_ids
.
name
=
std
::
string
(
"sent_ids"
);
input_masks
.
name
=
std
::
string
(
"input_mask"
);
if
(
req
->
has_feed_var_names
())
{
// support paddlehub model
std
::
vector
<
std
::
string
>
feed_list
=
split
(
req
->
feed_var_names
(),
";"
);
src_ids
.
name
=
feed_list
[
0
];
pos_ids
.
name
=
feed_list
[
1
];
seg_ids
.
name
=
feed_list
[
2
];
input_masks
.
name
=
feed_list
[
3
];
}
else
{
src_ids
.
name
=
std
::
string
(
"src_ids"
);
pos_ids
.
name
=
std
::
string
(
"pos_ids"
);
seg_ids
.
name
=
std
::
string
(
"sent_ids"
);
input_masks
.
name
=
std
::
string
(
"input_mask"
);
}
src_ids
.
dtype
=
paddle
::
PaddleDType
::
INT64
;
src_ids
.
shape
=
{
batch_size
,
MAX_SEQ_LEN
,
1
};
...
...
demo-serving/proto/bert_service.proto
浏览文件 @
aad622c5
...
...
@@ -25,11 +25,15 @@ message BertReqInstance {
repeated
int64
sentence_type_ids
=
2
;
repeated
int64
position_ids
=
3
;
repeated
float
input_masks
=
4
;
optional
int64
max_seq_len
=
5
;
optional
int64
emb_size
=
6
;
};
message
Request
{
repeated
BertReqInstance
instances
=
1
;
};
message
Request
{
repeated
BertReqInstance
instances
=
1
;
optional
int64
max_seq_len
=
2
;
optional
int64
emb_size
=
3
;
optional
string
feed_var_names
=
4
;
optional
string
fetch_var_names
=
5
;
};
message
EmbeddingValues
{
repeated
float
values
=
1
;
};
...
...
doc/DOCKER.md
0 → 100644
浏览文件 @
aad622c5
# 使用Docker编译Paddle Serving
## Docker编译环境要求
+
开发机上已安装Docker
+
编译GPU版本需要安装nvidia-docker
[
CPU版本Dockerfile
](
../Dockerfile
)
[
GPU版本Dockerfile
](
../Dockerfile.gpu
)
## 使用方法
### 构建Docker镜像
建立新目录,复制Dockerfile内容到该目录下Dockerfile文件
执行
```
bash
docker build
-t
serving_compile:cpu .
```
或者
```
bash
docker build
-t
serving_compile:cuda9 .
```
## 进入Docker
CPU版本请执行
```
bash
docker run
-it
serving_compile:cpu bash
```
GPU版本请执行
```
bash
docker run
-it
--runtime
=
nvidia
-it
serving_compile:cuda9 bash
```
## 预编译文件可执行环境列表
| Docker预编译版本可运行环境 |
| -------------------------- |
| Centos6 |
| Centos7 |
| Ubuntu16.04 |
| Ubuntu 18.04 |
| GPU Docker预编译版本支持的CUDA版本 |
| ---------------------------------- |
| cuda8_cudnn7 |
| cuda9_cudnn7 |
| cuda10_cudnn7 |
**备注:**
+
若执行预编译版本出现找不到libcrypto.so.10、libssl.so.10的情况,可以将Docker环境中的/usr/lib64/libssl.so.10与/usr/lib64/libcrypto.so.10复制到可执行文件所在目录。
+
CPU预编译版本仅可在CPU机器上执行,GPU预编译版本仅可在GPU机器上执行
doc/INSTALL.md
浏览文件 @
aad622c5
...
...
@@ -16,6 +16,8 @@ openssl & openssl-devel
## 编译
推荐使用Docker编译Paddle Serving,
[
Docker编译使用说明
](
./DOCKER.md
)
```
shell
$
git clone https://github.com/PaddlePaddle/serving.git
$
cd
serving
...
...
sdk-cpp/proto/bert_service.proto
浏览文件 @
aad622c5
...
...
@@ -25,11 +25,15 @@ message BertReqInstance {
repeated
int64
sentence_type_ids
=
2
;
repeated
int64
position_ids
=
3
;
repeated
float
input_masks
=
4
;
optional
int64
max_seq_len
=
5
;
optional
int64
emb_size
=
6
;
};
message
Request
{
repeated
BertReqInstance
instances
=
1
;
};
message
Request
{
repeated
BertReqInstance
instances
=
1
;
optional
int64
max_seq_len
=
2
;
optional
int64
emb_size
=
3
;
optional
string
feed_var_names
=
4
;
optional
string
fetch_var_names
=
5
;
};
message
EmbeddingValues
{
repeated
float
values
=
1
;
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录