Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Serving
提交
4de287c6
S
Serving
项目概览
PaddlePaddle
/
Serving
1 年多 前同步成功
通知
186
Star
833
Fork
253
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
105
列表
看板
标记
里程碑
合并请求
10
Wiki
2
Wiki
分析
仓库
DevOps
项目成员
Pages
S
Serving
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
105
Issue
105
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
2
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4de287c6
编写于
5月 14, 2022
作者:
T
TeslaZhao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix Paddle 2.3 compile error
上级
48305205
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
267 addition
and
8 deletion
+267
-8
cmake/paddlepaddle.cmake
cmake/paddlepaddle.cmake
+12
-0
core/configure/proto/server_configure.proto
core/configure/proto/server_configure.proto
+14
-0
core/general-server/op/general_remote_op.cpp
core/general-server/op/general_remote_op.cpp
+126
-0
core/general-server/op/general_remote_op.h
core/general-server/op/general_remote_op.h
+58
-0
core/predictor/common/constant.cpp
core/predictor/common/constant.cpp
+1
-1
core/predictor/framework/bsf-inl.h
core/predictor/framework/bsf-inl.h
+1
-1
paddle_inference/paddle/include/paddle_engine.h
paddle_inference/paddle/include/paddle_engine.h
+10
-6
python/paddle_serving_server/serve.py
python/paddle_serving_server/serve.py
+25
-0
python/paddle_serving_server/server.py
python/paddle_serving_server/server.py
+20
-0
未找到文件。
cmake/paddlepaddle.cmake
浏览文件 @
4de287c6
...
...
@@ -171,11 +171,23 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mklml/lib)
SET
(
CMAKE_INSTALL_RPATH
"
${
CMAKE_INSTALL_RPATH
}
"
"
${
PADDLE_INSTALL_DIR
}
/third_party/install/mkldnn/lib"
)
LINK_DIRECTORIES
(
${
PADDLE_INSTALL_DIR
}
/third_party/install/mkldnn/lib
)
SET
(
CMAKE_INSTALL_RPATH
"
${
CMAKE_INSTALL_RPATH
}
"
"
${
PADDLE_INSTALL_DIR
}
/third_party/install/paddle2onnx/lib"
)
LINK_DIRECTORIES
(
${
PADDLE_INSTALL_DIR
}
/third_party/install/paddle2onnx/lib
)
SET
(
CMAKE_INSTALL_RPATH
"
${
CMAKE_INSTALL_RPATH
}
"
"
${
PADDLE_INSTALL_DIR
}
/third_party/install/onnxruntime/lib"
)
LINK_DIRECTORIES
(
${
PADDLE_INSTALL_DIR
}
/third_party/install/onnxruntime/lib
)
if
(
NOT WITH_MKLML
)
ADD_LIBRARY
(
openblas STATIC IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET openblas PROPERTY IMPORTED_LOCATION
${
PADDLE_INSTALL_DIR
}
/third_party/install/openblas/lib/libopenblas.a
)
endif
()
ADD_LIBRARY
(
paddle2onnx STATIC IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET paddle2onnx PROPERTY IMPORTED_LOCATION
${
PADDLE_INSTALL_DIR
}
/third_party/install/paddle2onnx/lib/libpaddle2onnx.so
)
ADD_LIBRARY
(
onnxruntime STATIC IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET onnxruntime PROPERTY IMPORTED_LOCATION
${
PADDLE_INSTALL_DIR
}
/third_party/install/onnxruntime/lib/libonnxruntime.so.1.10.0
)
ADD_LIBRARY
(
paddle_inference STATIC IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET paddle_inference PROPERTY IMPORTED_LOCATION
${
PADDLE_INSTALL_DIR
}
/lib/libpaddle_inference.so
)
if
(
WITH_ASCEND_CL
)
...
...
core/configure/proto/server_configure.proto
浏览文件 @
4de287c6
...
...
@@ -49,6 +49,20 @@ message EngineDesc {
optional
bool
gpu_multi_stream
=
20
;
optional
bool
use_ascend_cl
=
21
;
/*
* "gpu_memory_mb": allocate gpu memory by config.EnableUseGpu()
* "cpu_math_thread_num": set thread numbers of cpu math by
* config.SetCpuMathLibraryNumThreads()
* "trt_workspace_size": set TensorRT workspace size by
* config.EnableTensorRtEngine(), 1 << 25 default
* "trt_use_static": If true, save the optimization information of the TRT
* serialized to the disk, and load from the disk.
*/
optional
int32
gpu_memory_mb
=
22
[
default
=
100
];
optional
int32
cpu_math_thread_num
=
23
[
default
=
1
];
optional
int32
trt_workspace_size
=
24
[
default
=
33554432
];
optional
bool
trt_use_static
=
25
[
default
=
false
];
/*
* "runtime_thread_num": n == 0 means don`t use Asynchronous task scheduling
* mode.
...
...
core/general-server/op/general_remote_op.cpp
0 → 100644
浏览文件 @
4de287c6
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "core/general-server/op/general_remote_op.h"
#include <iostream>
#include <sstream>
#include "core/util/include/timer.h"
// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8
// will support: FLOAT16
#define BRPC_MAX_BODY_SIZE 2 * 1024 * 1024 * 1024
const
std
::
string
LODABALANCE
=
""
;
namespace
baidu
{
namespace
paddle_serving
{
namespace
serving
{
using
baidu
::
paddle_serving
::
Timer
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
Tensor
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
Request
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
Response
;
brpc
::
Channel
BRPCStub
::
brpc_channels
[
MAX_MP_NUM
];
brpc
::
ChannelOptions
BRPCStub
::
options
;
std
::
atomic
<
int
>
BRPCStub
::
inited
(
0
);
int
GeneralRemoteOp
::
inference
()
{
LOG
(
INFO
)
<<
"Enter GeneralRemoteOp:inference()"
;
int
expected
=
0
;
std
::
vector
<
std
::
string
>
op_address
=
address
();
if
(
BRPCStub
::
inited
.
compare_exchange_strong
(
expected
,
1
))
{
BRPCStub
::
options
.
protocol
=
"baidu_std"
;
BRPCStub
::
options
.
connection_type
=
"short"
;
BRPCStub
::
options
.
timeout_ms
=
80000
/*milliseconds*/
;
BRPCStub
::
options
.
max_retry
=
100
;
brpc
::
fLU64
::
FLAGS_max_body_size
=
BRPC_MAX_BODY_SIZE
;
LOG
(
ERROR
)
<<
"address size: "
<<
op_address
.
size
();
for
(
int
i
=
0
;
i
<
op_address
.
size
();
++
i
)
{
LOG
(
INFO
)
<<
i
+
1
<<
" address is "
<<
op_address
[
i
].
c_str
();
BRPCStub
::
brpc_channels
[
i
].
Init
(
op_address
[
i
].
c_str
(),
LODABALANCE
.
c_str
(),
&
BRPCStub
::
options
);
}
BRPCStub
::
inited
++
;
}
while
(
BRPCStub
::
inited
<
2
)
{
}
Timer
timeline
;
int64_t
start
=
timeline
.
TimeStampUS
();
timeline
.
Start
();
VLOG
(
2
)
<<
"Going to run Remote inference"
;
Request
*
req
=
(
Request
*
)(
get_request_message
());
Response
*
res
=
mutable_data
<
Response
>
();
uint64_t
log_id
=
req
->
log_id
();
brpc
::
Controller
brpc_controllers
[
MAX_MP_NUM
];
brpc
::
CallId
brpc_callids
[
MAX_MP_NUM
];
Response
brpc_response_tmp
;
size_t
i
=
0
;
// Init BRPC controllers, callids and stubs
for
(
i
=
0
;
i
<
op_address
.
size
();
++
i
)
{
brpc_controllers
[
i
].
set_log_id
(
log_id
);
brpc_callids
[
i
]
=
brpc_controllers
[
i
].
call_id
();
}
for
(
i
=
0
;
i
<
op_address
.
size
();
++
i
)
{
baidu
::
paddle_serving
::
predictor
::
general_model
::
GeneralModelService_Stub
stub
(
&
BRPCStub
::
brpc_channels
[
i
]);
LOG
(
INFO
)
<<
"Sended 1 request to Slave Sever "
<<
i
;
if
(
0
==
i
)
{
stub
.
inference
(
&
brpc_controllers
[
i
],
req
,
res
,
brpc
::
DoNothing
());
continue
;
}
stub
.
inference
(
&
brpc_controllers
[
i
],
req
,
&
brpc_response_tmp
,
brpc
::
DoNothing
());
}
LOG
(
INFO
)
<<
"All request are sended, waiting for all responses."
;
// Wait RPC done.
for
(
i
=
0
;
i
<
op_address
.
size
();
++
i
)
{
brpc
::
Join
(
brpc_callids
[
i
]);
}
// Print RPC Results
for
(
i
=
0
;
i
<
op_address
.
size
();
++
i
)
{
LOG
(
INFO
)
<<
"brpc_controller_"
<<
i
<<
" status:"
<<
brpc_controllers
[
i
].
Failed
();
if
(
!
brpc_controllers
[
i
].
Failed
())
{
LOG
(
INFO
)
<<
"Received response from "
<<
brpc_controllers
[
i
].
remote_side
()
<<
" Latency="
<<
brpc_controllers
[
i
].
latency_us
()
<<
"us"
;
}
else
{
LOG
(
ERROR
)
<<
brpc_controllers
[
i
].
ErrorText
();
}
}
LOG
(
INFO
)
<<
"All brpc remote stubs joined done."
;
res
->
set_log_id
(
log_id
);
res
->
set_profile_server
(
req
->
profile_server
());
int64_t
end
=
timeline
.
TimeStampUS
();
res
->
add_profile_time
(
start
);
res
->
add_profile_time
(
end
);
return
0
;
}
DEFINE_OP
(
GeneralRemoteOp
);
}
// namespace serving
}
// namespace paddle_serving
}
// namespace baidu
core/general-server/op/general_remote_op.h
0 → 100644
浏览文件 @
4de287c6
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <brpc/channel.h>
#include <butil/logging.h>
#include <butil/time.h>
#include <gflags/gflags.h>
#include <atomic>
#include <memory>
#include <string>
#include <vector>
#include "core/general-server/general_model_service.pb.h"
#include "core/sdk-cpp/builtin_format.pb.h"
#include "core/sdk-cpp/general_model_service.pb.h"
#include "core/sdk-cpp/include/common.h"
#include "core/sdk-cpp/include/predictor_sdk.h"
#define MAX_MP_NUM 16
namespace
baidu
{
namespace
paddle_serving
{
namespace
serving
{
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
Request
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
Response
;
class
GeneralRemoteOp
:
public
baidu
::
paddle_serving
::
predictor
::
OpWithChannel
<
baidu
::
paddle_serving
::
predictor
::
general_model
::
Response
>
{
public:
DECLARE_OP
(
GeneralRemoteOp
);
int
inference
();
};
class
BRPCStub
{
public:
static
brpc
::
Channel
brpc_channels
[
MAX_MP_NUM
];
static
brpc
::
ChannelOptions
options
;
static
std
::
atomic
<
int
>
inited
;
};
}
// namespace serving
}
// namespace paddle_serving
}
// namespace baidu
core/predictor/common/constant.cpp
浏览文件 @
4de287c6
...
...
@@ -20,7 +20,7 @@ namespace predictor {
DEFINE_bool
(
use_parallel_infer_service
,
false
,
""
);
DEFINE_int32
(
el_log_level
,
16
,
""
);
DEFINE_int32
(
idle_timeout_s
,
16
,
""
);
DEFINE_int32
(
idle_timeout_s
,
80
,
""
);
DEFINE_int32
(
port
,
8010
,
""
);
DEFINE_string
(
workflow_path
,
"./conf"
,
""
);
DEFINE_string
(
workflow_file
,
"workflow.prototxt"
,
""
);
...
...
core/predictor/framework/bsf-inl.h
浏览文件 @
4de287c6
...
...
@@ -341,7 +341,7 @@ bool TaskExecutor<TaskT>::move_task_to_batch(
LOG
(
INFO
)
<<
"Hit auto padding, merge "
<<
padding_task_count
<<
" tasks into 1 batch."
;
}
LOG
(
INFO
)
<<
"Number of tasks remaining in _task_queue is"
LOG
(
INFO
)
<<
"Number of tasks remaining in _task_queue is
"
<<
_task_queue
.
size
();
return
true
;
}
...
...
paddle_inference/paddle/include/paddle_engine.h
浏览文件 @
4de287c6
...
...
@@ -241,10 +241,10 @@ class PaddleInferenceEngine : public EngineCore {
}
config
.
SwitchSpecifyInputNames
(
true
);
config
.
SetCpuMathLibraryNumThreads
(
1
);
config
.
SetCpuMathLibraryNumThreads
(
engine_conf
.
cpu_math_thread_num
()
);
if
(
engine_conf
.
has_use_gpu
()
&&
engine_conf
.
use_gpu
())
{
// 2000MB GPU memory
config
.
EnableUseGpu
(
50
,
gpu_id
);
config
.
EnableUseGpu
(
engine_conf
.
gpu_memory_mb
()
,
gpu_id
);
if
(
engine_conf
.
has_gpu_multi_stream
()
&&
engine_conf
.
gpu_multi_stream
())
{
config
.
EnableGpuMultiStream
();
...
...
@@ -267,17 +267,17 @@ class PaddleInferenceEngine : public EngineCore {
if
(
engine_conf
.
has_use_trt
()
&&
engine_conf
.
use_trt
())
{
config
.
SwitchIrOptim
(
true
);
if
(
!
engine_conf
.
has_use_gpu
()
||
!
engine_conf
.
use_gpu
())
{
config
.
EnableUseGpu
(
50
,
gpu_id
);
config
.
EnableUseGpu
(
engine_conf
.
gpu_memory_mb
()
,
gpu_id
);
if
(
engine_conf
.
has_gpu_multi_stream
()
&&
engine_conf
.
gpu_multi_stream
())
{
config
.
EnableGpuMultiStream
();
}
}
config
.
EnableTensorRtEngine
(
1
<<
25
,
config
.
EnableTensorRtEngine
(
engine_conf
.
trt_workspace_size
()
,
max_batch
,
local_min_subgraph_size
,
precision_type
,
false
,
engine_conf
.
trt_use_static
()
,
FLAGS_use_calib
);
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
min_input_shape
;
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
;
...
...
@@ -413,7 +413,11 @@ class PaddleInferenceEngine : public EngineCore {
<<
", use_ascend_cl: "
<<
engine_conf
.
has_use_ascend_cl
()
<<
", use_xpu: "
<<
engine_conf
.
use_xpu
()
<<
", enable_memory_optimization: "
<<
engine_conf
.
enable_memory_optimization
();
<<
engine_conf
.
enable_memory_optimization
()
<<
", gpu_memory_mb: "
<<
engine_conf
.
gpu_memory_mb
()
<<
", cpu_math_thread_num: "
<<
engine_conf
.
cpu_math_thread_num
()
<<
", trt_workspace_size: "
<<
engine_conf
.
trt_workspace_size
()
<<
", trt_use_static: "
<<
engine_conf
.
trt_use_static
();
VLOG
(
2
)
<<
"create paddle predictor sucess, path: "
<<
model_path
;
return
0
;
...
...
python/paddle_serving_server/serve.py
浏览文件 @
4de287c6
...
...
@@ -280,6 +280,27 @@ def serve_args():
default
=
""
,
nargs
=
"+"
,
help
=
"min_subgraph_size"
)
parser
.
add_argument
(
"--gpu_memory_mb"
,
type
=
int
,
default
=
50
,
help
=
"Initially allocate GPU storage size"
)
parser
.
add_argument
(
"--cpu_math_thread_num"
,
type
=
int
,
default
=
1
,
help
=
"Initialize the number of CPU computing threads"
)
parser
.
add_argument
(
"--trt_workspace_size"
,
type
=
int
,
default
=
33554432
,
help
=
"Initialize allocation 1 << 25 GPU storage size"
)
parser
.
add_argument
(
"--trt_use_static"
,
default
=
False
,
action
=
"store_true"
,
help
=
"Initialize TRT with static data"
)
return
parser
.
parse_args
()
...
...
@@ -396,10 +417,14 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi
server
.
set_dist_endpoints
(
args
.
dist_endpoints
.
split
(
","
))
server
.
set_dist_subgraph_index
(
args
.
dist_subgraph_index
)
server
.
set_min_subgraph_size
(
args
.
min_subgraph_size
)
server
.
set_gpu_memory_mb
(
args
.
gpu_memory_mb
)
server
.
set_cpu_math_thread_num
(
args
.
cpu_math_thread_num
)
if
args
.
use_trt
and
device
==
"gpu"
:
server
.
set_trt
()
server
.
set_ir_optimize
(
True
)
server
.
set_trt_workspace_size
(
args
.
trt_workspace_size
)
server
.
set_trt_use_static
(
args
.
trt_use_static
)
if
is_ocr
:
info
=
set_ocr_dynamic_shape_info
()
server
.
set_trt_dynamic_shape_info
(
info
)
...
...
python/paddle_serving_server/server.py
浏览文件 @
4de287c6
...
...
@@ -119,6 +119,10 @@ class Server(object):
self
.
dist_master_serving
=
False
self
.
min_subgraph_size
=
[]
self
.
trt_dynamic_shape_info
=
[]
self
.
gpu_memory_mb
=
50
self
.
cpu_math_thread_num
=
1
self
.
trt_workspace_size
=
33554432
# 1 << 25
self
.
trt_use_static
=
False
def
get_fetch_list
(
self
,
infer_node_idx
=-
1
):
fetch_names
=
[
...
...
@@ -289,6 +293,18 @@ class Server(object):
def
set_trt_dynamic_shape_info
(
self
,
info
):
self
.
trt_dynamic_shape_info
=
info
def
set_gpu_memory_mb
(
self
,
gpu_memory_mb
):
self
.
gpu_memory_mb
=
gpu_memory_mb
def
set_cpu_math_thread_num
(
self
,
cpu_math_thread_num
):
self
.
cpu_math_thread_num
=
cpu_math_thread_num
def
set_trt_workspace_size
(
self
,
trt_workspace_size
):
self
.
trt_workspace_size
=
trt_workspace_size
def
set_trt_use_static
(
self
,
trt_use_static
):
self
.
trt_use_static
=
trt_use_static
def
_prepare_engine
(
self
,
model_config_paths
,
device
,
use_encryption_model
):
self
.
device
=
device
if
self
.
model_toolkit_conf
==
None
:
...
...
@@ -342,6 +358,10 @@ class Server(object):
engine
.
use_xpu
=
self
.
use_xpu
engine
.
use_ascend_cl
=
self
.
use_ascend_cl
engine
.
use_gpu
=
False
engine
.
gpu_memory_mb
=
self
.
gpu_memory_mb
engine
.
cpu_math_thread_num
=
self
.
cpu_math_thread_num
engine
.
trt_workspace_size
=
self
.
trt_workspace_size
engine
.
trt_use_static
=
self
.
trt_use_static
# use distributed model.
if
self
.
dist_subgraph_index
>=
0
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录