Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Serving
提交
1ced0a1c
S
Serving
项目概览
PaddlePaddle
/
Serving
1 年多 前同步成功
通知
186
Star
833
Fork
253
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
105
列表
看板
标记
里程碑
合并请求
10
Wiki
2
Wiki
分析
仓库
DevOps
项目成员
Pages
S
Serving
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
105
Issue
105
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
2
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
1ced0a1c
编写于
7月 14, 2021
作者:
Z
Zhang Yulong
提交者:
GitHub
7月 14, 2021
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into ci-test
上级
58415334
ac9f69fb
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
167 addition
and
124 deletion
+167
-124
core/predictor/framework/infer.cpp
core/predictor/framework/infer.cpp
+2
-1
python/paddle_serving_client/client.py
python/paddle_serving_client/client.py
+2
-2
python/paddle_serving_server/serve.py
python/paddle_serving_server/serve.py
+72
-46
python/paddle_serving_server/server.py
python/paddle_serving_server/server.py
+33
-21
python/paddle_serving_server/web_service.py
python/paddle_serving_server/web_service.py
+58
-54
未找到文件。
core/predictor/framework/infer.cpp
100755 → 100644
浏览文件 @
1ced0a1c
...
...
@@ -391,7 +391,8 @@ int InferManager::proc_initialize(const char* path,
return
-
1
;
}
uint32_t
engine_num
=
model_toolkit_conf
.
engines_size
();
im
::
bsf
::
TaskExecutorVector
<
TaskT
>::
instance
().
resize
(
*
engine_index_ptr
+
engine_num
);
im
::
bsf
::
TaskExecutorVector
<
TaskT
>::
instance
().
resize
(
*
engine_index_ptr
+
engine_num
);
for
(
uint32_t
ei
=
0
;
ei
<
engine_num
;
++
ei
)
{
LOG
(
INFO
)
<<
"model_toolkit_conf.engines("
<<
ei
<<
").name: "
<<
model_toolkit_conf
.
engines
(
ei
).
name
();
...
...
python/paddle_serving_client/client.py
浏览文件 @
1ced0a1c
...
...
@@ -79,7 +79,7 @@ class SDKConfig(object):
self
.
tag_list
=
[]
self
.
cluster_list
=
[]
self
.
variant_weight_list
=
[]
self
.
rpc_timeout_ms
=
20000
self
.
rpc_timeout_ms
=
20000
0
self
.
load_balance_strategy
=
"la"
def
add_server_variant
(
self
,
tag
,
cluster
,
variant_weight
):
...
...
@@ -142,7 +142,7 @@ class Client(object):
self
.
profile_
=
_Profiler
()
self
.
all_numpy_input
=
True
self
.
has_numpy_input
=
False
self
.
rpc_timeout_ms
=
20000
self
.
rpc_timeout_ms
=
20000
0
from
.serving_client
import
PredictorRes
self
.
predictorres_constructor
=
PredictorRes
...
...
python/paddle_serving_server/serve.py
浏览文件 @
1ced0a1c
...
...
@@ -31,6 +31,67 @@ elif sys.version_info.major == 3:
from
http.server
import
BaseHTTPRequestHandler
,
HTTPServer
def
format_gpu_to_strlist
(
unformatted_gpus
):
gpus_strlist
=
[]
if
isinstance
(
unformatted_gpus
,
int
):
gpus_strlist
=
[
str
(
unformatted_gpus
)]
elif
isinstance
(
unformatted_gpus
,
list
):
if
unformatted_gpus
==
[
""
]:
gpus_strlist
=
[
"-1"
]
elif
len
(
unformatted_gpus
)
==
0
:
gpus_strlist
=
[
"-1"
]
else
:
gpus_strlist
=
[
str
(
x
)
for
x
in
unformatted_gpus
]
elif
isinstance
(
unformatted_gpus
,
str
):
if
unformatted_gpus
==
""
:
gpus_strlist
=
[
"-1"
]
else
:
gpus_strlist
=
[
unformatted_gpus
]
elif
unformatted_gpus
==
None
:
gpus_strlist
=
[
"-1"
]
else
:
raise
ValueError
(
"error input of set_gpus"
)
# check cuda visible
if
"CUDA_VISIBLE_DEVICES"
in
os
.
environ
:
env_gpus
=
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
].
split
(
","
)
for
op_gpus_str
in
gpus_strlist
:
op_gpu_list
=
op_gpus_str
.
split
(
","
)
# op_gpu_list == ["-1"] means this op use CPU
# so don`t check cudavisible.
if
op_gpu_list
==
[
"-1"
]:
continue
for
ids
in
op_gpu_list
:
if
ids
not
in
env_gpus
:
print
(
"gpu_ids is not in CUDA_VISIBLE_DEVICES."
)
exit
(
-
1
)
# check gpuid is valid
for
op_gpus_str
in
gpus_strlist
:
op_gpu_list
=
op_gpus_str
.
split
(
","
)
use_gpu
=
False
for
ids
in
op_gpu_list
:
if
int
(
ids
)
<
-
1
:
raise
ValueError
(
"The input of gpuid error."
)
if
int
(
ids
)
>=
0
:
use_gpu
=
True
if
int
(
ids
)
==
-
1
and
use_gpu
:
raise
ValueError
(
"You can not use CPU and GPU in one model."
)
return
gpus_strlist
def
is_gpu_mode
(
unformatted_gpus
):
gpus_strlist
=
format_gpu_to_strlist
(
unformatted_gpus
)
for
op_gpus_str
in
gpus_strlist
:
op_gpu_list
=
op_gpus_str
.
split
(
","
)
for
ids
in
op_gpu_list
:
if
int
(
ids
)
>=
0
:
return
True
return
False
def
serve_args
():
parser
=
argparse
.
ArgumentParser
(
"serve"
)
parser
.
add_argument
(
...
...
@@ -38,7 +99,7 @@ def serve_args():
parser
.
add_argument
(
"--port"
,
type
=
int
,
default
=
9292
,
help
=
"Port of the starting gpu"
)
parser
.
add_argument
(
"--device"
,
type
=
str
,
default
=
"
g
pu"
,
help
=
"Type of device"
)
"--device"
,
type
=
str
,
default
=
"
c
pu"
,
help
=
"Type of device"
)
parser
.
add_argument
(
"--gpu_ids"
,
type
=
str
,
default
=
""
,
nargs
=
"+"
,
help
=
"gpu ids"
)
parser
.
add_argument
(
...
...
@@ -118,9 +179,9 @@ def serve_args():
def
start_gpu_card_model
(
gpu_mode
,
port
,
args
):
# pylint: disable=doc-string-missing
device
=
"
g
pu"
if
gpu_mode
==
Fals
e
:
device
=
"
c
pu"
device
=
"
c
pu"
if
gpu_mode
==
Tru
e
:
device
=
"
g
pu"
thread_num
=
args
.
thread
model
=
args
.
model
...
...
@@ -211,34 +272,15 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi
def
start_multi_card
(
args
,
serving_port
=
None
):
# pylint: disable=doc-string-missing
gpus
=
[]
if
serving_port
==
None
:
serving_port
=
args
.
port
if
args
.
gpu_ids
==
""
:
gpus
=
[]
else
:
#check the gpu_id is valid or not.
gpus
=
args
.
gpu_ids
if
isinstance
(
gpus
,
str
):
gpus
=
[
gpus
]
if
"CUDA_VISIBLE_DEVICES"
in
os
.
environ
:
env_gpus
=
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
].
split
(
","
)
for
op_gpus_str
in
gpus
:
op_gpu_list
=
op_gpus_str
.
split
(
","
)
for
ids
in
op_gpu_list
:
if
ids
not
in
env_gpus
:
print
(
"gpu_ids is not in CUDA_VISIBLE_DEVICES."
)
exit
(
-
1
)
if
args
.
use_lite
:
print
(
"run using paddle-lite."
)
start_gpu_card_model
(
False
,
serving_port
,
args
)
elif
len
(
gpus
)
<=
0
:
print
(
"gpu_ids not set, going to run cpu service."
)
start_gpu_card_model
(
False
,
serving_port
,
args
)
else
:
start_gpu_card_model
(
True
,
serving_port
,
args
)
start_gpu_card_model
(
is_gpu_mode
(
args
.
gpu_ids
)
,
serving_port
,
args
)
class
MainService
(
BaseHTTPRequestHandler
):
...
...
@@ -320,7 +362,9 @@ class MainService(BaseHTTPRequestHandler):
if
__name__
==
"__main__"
:
# args.device is not used at all.
# just keep the interface.
# so --device should not be recommended at the HomePage.
args
=
serve_args
()
for
single_model_config
in
args
.
model
:
if
os
.
path
.
isdir
(
single_model_config
):
...
...
@@ -346,29 +390,10 @@ if __name__ == "__main__":
web_service
=
WebService
(
name
=
args
.
name
)
web_service
.
load_model_config
(
args
.
model
)
if
args
.
gpu_ids
==
""
:
gpus
=
[]
else
:
#check the gpu_id is valid or not.
gpus
=
args
.
gpu_ids
if
isinstance
(
gpus
,
str
):
gpus
=
[
gpus
]
if
"CUDA_VISIBLE_DEVICES"
in
os
.
environ
:
env_gpus
=
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
].
split
(
","
)
for
op_gpus_str
in
gpus
:
op_gpu_list
=
op_gpus_str
.
split
(
","
)
for
ids
in
op_gpu_list
:
if
ids
not
in
env_gpus
:
print
(
"gpu_ids is not in CUDA_VISIBLE_DEVICES."
)
exit
(
-
1
)
if
len
(
gpus
)
>
0
:
web_service
.
set_gpus
(
gpus
)
workdir
=
"{}_{}"
.
format
(
args
.
workdir
,
args
.
port
)
web_service
.
prepare_server
(
workdir
=
workdir
,
port
=
args
.
port
,
device
=
args
.
device
,
use_lite
=
args
.
use_lite
,
use_xpu
=
args
.
use_xpu
,
ir_optim
=
args
.
ir_optim
,
...
...
@@ -378,7 +403,8 @@ if __name__ == "__main__":
use_trt
=
args
.
use_trt
,
gpu_multi_stream
=
args
.
gpu_multi_stream
,
op_num
=
args
.
op_num
,
op_max_batch
=
args
.
op_max_batch
)
op_max_batch
=
args
.
op_max_batch
,
gpuid
=
args
.
gpu_ids
)
web_service
.
run_rpc_service
()
app_instance
=
Flask
(
__name__
)
...
...
python/paddle_serving_server/server.py
浏览文件 @
1ced0a1c
...
...
@@ -17,6 +17,7 @@ import tarfile
import
socket
import
paddle_serving_server
as
paddle_serving_server
from
paddle_serving_server.rpc_service
import
MultiLangServerServiceServicer
from
paddle_serving_server.serve
import
format_gpu_to_strlist
from
.proto
import
server_configure_pb2
as
server_sdk
from
.proto
import
general_model_config_pb2
as
m_config
from
.proto
import
multi_lang_general_model_service_pb2_grpc
...
...
@@ -171,12 +172,7 @@ class Server(object):
self
.
device
=
device
def
set_gpuid
(
self
,
gpuid
):
if
isinstance
(
gpuid
,
int
):
self
.
gpuid
=
str
(
gpuid
)
elif
isinstance
(
gpuid
,
list
):
self
.
gpuid
=
[
str
(
x
)
for
x
in
gpuid
]
else
:
self
.
gpuid
=
gpuid
self
.
gpuid
=
format_gpu_to_strlist
(
gpuid
)
def
set_op_num
(
self
,
op_num
):
self
.
op_num
=
op_num
...
...
@@ -197,23 +193,20 @@ class Server(object):
self
.
use_xpu
=
True
def
_prepare_engine
(
self
,
model_config_paths
,
device
,
use_encryption_model
):
self
.
device
=
device
if
self
.
model_toolkit_conf
==
None
:
self
.
model_toolkit_conf
=
[]
self
.
device
=
device
# Generally, self.gpuid = str[] or str.
# such as "0" or ["0"] or ["0,1"] or ["0,1" , "1,2"]
if
isinstance
(
self
.
gpuid
,
str
):
self
.
gpuid
=
[
self
.
gpuid
]
# Generally, self.gpuid = str[] or [].
# when len(self.gpuid) means no gpuid is specified.
# if self.device == "gpu" or self.use_trt:
# we assume you forget to set gpuid, so set gpuid = ['0'];
if
len
(
self
.
gpuid
)
==
0
:
if
self
.
device
==
"gpu"
or
self
.
use_trt
:
self
.
gpuid
.
append
(
"0"
)
if
len
(
self
.
gpuid
)
==
0
or
self
.
gpuid
==
[
"-1"
]:
if
self
.
device
==
"gpu"
or
self
.
use_trt
or
self
.
gpu_multi_stream
:
self
.
gpuid
=
[
"0"
]
self
.
device
=
"gpu"
else
:
self
.
gpuid
.
append
(
"-1"
)
self
.
gpuid
=
[
"-1"
]
if
isinstance
(
self
.
op_num
,
int
):
self
.
op_num
=
[
self
.
op_num
]
...
...
@@ -254,12 +247,14 @@ class Server(object):
for
ids
in
op_gpu_list
:
engine
.
gpu_ids
.
extend
([
int
(
ids
)])
if
self
.
device
==
"gpu"
or
self
.
use_trt
:
if
self
.
device
==
"gpu"
or
self
.
use_trt
or
self
.
gpu_multi_stream
:
engine
.
use_gpu
=
True
# this is for Mixed use of GPU and CPU
# if model-1 use GPU and set the device="gpu"
# but gpuid[1] = "-1" which means use CPU in Model-2
# so config about GPU should be False.
# op_gpu_list = gpuid[index].split(",")
# which is the gpuid for each engine.
if
len
(
op_gpu_list
)
==
1
:
if
int
(
op_gpu_list
[
0
])
==
-
1
:
engine
.
use_gpu
=
False
...
...
@@ -500,10 +495,17 @@ class Server(object):
def
prepare_server
(
self
,
workdir
=
None
,
port
=
9292
,
device
=
"cpu"
,
device
=
None
,
use_encryption_model
=
False
,
cube_conf
=
None
):
self
.
device
=
device
# if `device` is not set, use self.device
# self.device may not be changed.
# or self.device may have changed by set_device.
if
device
==
None
:
device
=
self
.
device
# if `device` is set, let self.device = device.
else
:
self
.
device
=
device
if
workdir
==
None
:
workdir
=
"./tmp"
os
.
system
(
"mkdir -p {}"
.
format
(
workdir
))
...
...
@@ -602,6 +604,7 @@ class MultiLangServer(object):
self
.
body_size_
=
64
*
1024
*
1024
self
.
concurrency_
=
100000
self
.
is_multi_model_
=
False
# for model ensemble, which is not useful right now.
self
.
device
=
"cpu"
# this is the default value for multilang `device`.
def
set_max_concurrency
(
self
,
concurrency
):
self
.
concurrency_
=
concurrency
...
...
@@ -609,6 +612,7 @@ class MultiLangServer(object):
def
set_device
(
self
,
device
=
"cpu"
):
self
.
device
=
device
self
.
bserver_
.
set_device
(
device
)
def
set_num_threads
(
self
,
threads
):
self
.
worker_num_
=
threads
...
...
@@ -727,10 +731,18 @@ class MultiLangServer(object):
def
prepare_server
(
self
,
workdir
=
None
,
port
=
9292
,
device
=
"cpu"
,
device
=
None
,
use_encryption_model
=
False
,
cube_conf
=
None
):
self
.
device
=
device
# if `device` is not set, use self.device
# self.device may not be changed.
# or self.device may have changed by set_device.
if
device
==
None
:
device
=
self
.
device
# if `device` is set, let self.device = device.
else
:
self
.
device
=
device
if
not
self
.
_port_is_available
(
port
):
raise
SystemExit
(
"Port {} is already used"
.
format
(
port
))
default_port
=
12000
...
...
python/paddle_serving_server/web_service.py
浏览文件 @
1ced0a1c
...
...
@@ -26,6 +26,7 @@ import numpy as np
import
os
from
paddle_serving_server
import
pipeline
from
paddle_serving_server.pipeline
import
Op
from
paddle_serving_server.serve
import
format_gpu_to_strlist
def
port_is_available
(
port
):
...
...
@@ -44,7 +45,7 @@ class WebService(object):
# pipeline
self
.
_server
=
pipeline
.
PipelineServer
(
self
.
name
)
self
.
gpus
=
[]
# deprecated
self
.
gpus
=
[
"-1"
]
# deprecated
self
.
rpc_service_list
=
[]
# deprecated
def
get_pipeline_response
(
self
,
read_op
):
...
...
@@ -103,19 +104,24 @@ class WebService(object):
if
client_config_path
==
None
:
self
.
client_config_path
=
file_path_list
# after this function, self.gpus should be a list of str or [].
def
set_gpus
(
self
,
gpus
):
print
(
"This API will be deprecated later. Please do not use it"
)
if
isinstance
(
gpus
,
int
):
self
.
gpus
=
str
(
gpus
)
elif
isinstance
(
gpus
,
list
):
self
.
gpus
=
[
str
(
x
)
for
x
in
gpus
]
else
:
self
.
gpus
=
gpus
self
.
gpus
=
format_gpu_to_strlist
(
gpus
)
# this function can be called by user
# or by Function create_rpc_config
# if by user, user can set_gpus or pass the `gpus`
# if `gpus` == None, which means it`s not set at all.
# at this time, we should use self.gpus instead.
# otherwise, we should use the `gpus` first.
# which means if set_gpus and `gpus` is both set.
# `gpus` will be used.
def
default_rpc_service
(
self
,
workdir
,
port
=
9292
,
gpus
=
-
1
,
gpus
=
None
,
thread_num
=
2
,
mem_optim
=
True
,
use_lite
=
False
,
...
...
@@ -127,16 +133,25 @@ class WebService(object):
gpu_multi_stream
=
False
,
op_num
=
None
,
op_max_batch
=
None
):
device
=
"gpu"
device
=
"cpu"
server
=
Server
()
# only when `gpus == None`, which means it`s not set at all
# we will use the self.gpus.
if
gpus
==
None
:
gpus
=
self
.
gpus
gpus
=
format_gpu_to_strlist
(
gpus
)
server
.
set_gpuid
(
gpus
)
if
gpus
==
-
1
or
gpus
==
"-1"
:
if
len
(
gpus
)
==
0
or
gpus
==
[
"-1"
]
:
if
use_lite
:
device
=
"arm"
else
:
device
=
"cpu"
else
:
server
.
set_gpuid
(
gpus
)
device
=
"gpu"
op_maker
=
OpMaker
()
op_seq_maker
=
OpSeqMaker
()
...
...
@@ -190,45 +205,31 @@ class WebService(object):
def
_launch_rpc_service
(
self
,
service_idx
):
self
.
rpc_service_list
[
service_idx
].
run_server
()
# if use this function, self.gpus must be set before.
# if not, we will use the default value, self.gpus = ["-1"].
# so we always pass the `gpus` = self.gpus.
def
create_rpc_config
(
self
):
if
len
(
self
.
gpus
)
==
0
:
# init cpu service
self
.
rpc_service_list
.
append
(
self
.
default_rpc_service
(
self
.
workdir
,
self
.
port_list
[
0
],
-
1
,
thread_num
=
self
.
thread_num
,
mem_optim
=
self
.
mem_optim
,
use_lite
=
self
.
use_lite
,
use_xpu
=
self
.
use_xpu
,
ir_optim
=
self
.
ir_optim
,
precision
=
self
.
precision
,
use_calib
=
self
.
use_calib
,
op_num
=
self
.
op_num
,
op_max_batch
=
self
.
op_max_batch
))
else
:
self
.
rpc_service_list
.
append
(
self
.
default_rpc_service
(
self
.
workdir
,
self
.
port_list
[
0
],
self
.
gpus
,
thread_num
=
self
.
thread_num
,
mem_optim
=
self
.
mem_optim
,
use_lite
=
self
.
use_lite
,
use_xpu
=
self
.
use_xpu
,
ir_optim
=
self
.
ir_optim
,
precision
=
self
.
precision
,
use_calib
=
self
.
use_calib
,
use_trt
=
self
.
use_trt
,
gpu_multi_stream
=
self
.
gpu_multi_stream
,
op_num
=
self
.
op_num
,
op_max_batch
=
self
.
op_max_batch
))
self
.
rpc_service_list
.
append
(
self
.
default_rpc_service
(
self
.
workdir
,
self
.
port_list
[
0
],
self
.
gpus
,
thread_num
=
self
.
thread_num
,
mem_optim
=
self
.
mem_optim
,
use_lite
=
self
.
use_lite
,
use_xpu
=
self
.
use_xpu
,
ir_optim
=
self
.
ir_optim
,
precision
=
self
.
precision
,
use_calib
=
self
.
use_calib
,
use_trt
=
self
.
use_trt
,
gpu_multi_stream
=
self
.
gpu_multi_stream
,
op_num
=
self
.
op_num
,
op_max_batch
=
self
.
op_max_batch
))
def
prepare_server
(
self
,
workdir
,
port
=
9393
,
device
=
"
g
pu"
,
device
=
"
c
pu"
,
precision
=
"fp32"
,
use_calib
=
False
,
use_lite
=
False
,
...
...
@@ -240,12 +241,13 @@ class WebService(object):
gpu_multi_stream
=
False
,
op_num
=
None
,
op_max_batch
=
None
,
gpuid
=
-
1
):
gpuid
=
None
):
print
(
"This API will be deprecated later. Please do not use it"
)
self
.
workdir
=
workdir
self
.
port
=
port
self
.
thread_num
=
thread_num
self
.
device
=
device
# self.device is not used at all.
# device is set by gpuid.
self
.
precision
=
precision
self
.
use_calib
=
use_calib
self
.
use_lite
=
use_lite
...
...
@@ -257,12 +259,14 @@ class WebService(object):
self
.
gpu_multi_stream
=
gpu_multi_stream
self
.
op_num
=
op_num
self
.
op_max_batch
=
op_max_batch
if
isinstance
(
gpuid
,
int
):
self
.
gpus
=
str
(
gpuid
)
elif
isinstance
(
gpuid
,
list
):
self
.
gpus
=
[
str
(
x
)
for
x
in
gpuid
]
# if gpuid != None, we will use gpuid first.
# otherwise, keep the self.gpus unchanged.
# maybe self.gpus is set by the Function set_gpus.
if
gpuid
!=
None
:
self
.
gpus
=
format_gpu_to_strlist
(
gpuid
)
else
:
self
.
gpus
=
gpuid
pass
default_port
=
12000
for
i
in
range
(
1000
):
...
...
@@ -359,8 +363,8 @@ class WebService(object):
if
gpu
:
# if user forget to call function `set_gpus` to set self.gpus.
# default self.gpus = [0].
if
len
(
self
.
gpus
)
==
0
:
self
.
gpus
.
append
(
0
)
if
len
(
self
.
gpus
)
==
0
or
self
.
gpus
==
[
"-1"
]
:
self
.
gpus
=
[
"0"
]
# right now, local Predictor only support 1 card.
# no matter how many gpu_id is in gpus, we only use the first one.
gpu_id
=
(
self
.
gpus
[
0
].
split
(
","
))[
0
]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录