Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Serving
提交
9afdca0a
S
Serving
项目概览
PaddlePaddle
/
Serving
1 年多 前同步成功
通知
186
Star
833
Fork
253
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
105
列表
看板
标记
里程碑
合并请求
10
Wiki
2
Wiki
分析
仓库
DevOps
项目成员
Pages
S
Serving
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
105
Issue
105
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
2
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9afdca0a
编写于
2月 18, 2020
作者:
G
guru4elephant
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add profiling timeline
上级
8dcee4e9
变更
18
隐藏空白更改
内联
并排
Showing
18 changed file
with
134 addition
and
22 deletion
+134
-22
core/CMakeLists.txt
core/CMakeLists.txt
+1
-1
core/general-client/CMakeLists.txt
core/general-client/CMakeLists.txt
+1
-1
core/general-client/include/general_model.h
core/general-client/include/general_model.h
+1
-0
core/general-client/src/general_model.cpp
core/general-client/src/general_model.cpp
+34
-8
core/general-server/op/general_infer_helper.h
core/general-server/op/general_infer_helper.h
+15
-2
core/general-server/op/general_infer_op.cpp
core/general-server/op/general_infer_op.cpp
+8
-4
core/general-server/op/general_reader_op.cpp
core/general-server/op/general_reader_op.cpp
+13
-0
core/general-server/op/general_response_op.cpp
core/general-server/op/general_response_op.cpp
+18
-1
core/general-server/op/general_text_reader_op.cpp
core/general-server/op/general_text_reader_op.cpp
+13
-0
core/general-server/op/general_text_response_op.cpp
core/general-server/op/general_text_response_op.cpp
+17
-0
core/general-server/proto/general_model_service.proto
core/general-server/proto/general_model_service.proto
+1
-1
core/predictor/framework/service.cpp
core/predictor/framework/service.cpp
+3
-0
core/sdk-cpp/proto/general_model_service.proto
core/sdk-cpp/proto/general_model_service.proto
+1
-1
core/util/CMakeLists.txt
core/util/CMakeLists.txt
+1
-0
core/util/include/timer.h
core/util/include/timer.h
+1
-0
core/util/src/CMakeLists.txt
core/util/src/CMakeLists.txt
+1
-0
core/util/src/timer.cc
core/util/src/timer.cc
+5
-0
python/paddle_serving_client/__init__.py
python/paddle_serving_client/__init__.py
+0
-3
未找到文件。
core/CMakeLists.txt
浏览文件 @
9afdca0a
...
...
@@ -26,5 +26,5 @@ endif()
if
(
NOT CLIENT_ONLY
)
add_subdirectory
(
predictor
)
add_subdirectory
(
general-server
)
add_subdirectory
(
util
)
endif
()
add_subdirectory
(
util
)
core/general-client/CMakeLists.txt
浏览文件 @
9afdca0a
if
(
CLIENT_ONLY
)
add_subdirectory
(
pybind11
)
pybind11_add_module
(
serving_client src/general_model.cpp src/pybind_general_model.cpp
)
target_link_libraries
(
serving_client PRIVATE -Wl,--whole-archive sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz
)
target_link_libraries
(
serving_client PRIVATE -Wl,--whole-archive
utils
sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz
)
endif
()
core/general-client/include/general_model.h
浏览文件 @
9afdca0a
...
...
@@ -87,6 +87,7 @@ class PredictorClient {
std
::
map
<
std
::
string
,
std
::
string
>
_fetch_name_to_var_name
;
std
::
vector
<
std
::
vector
<
int
>>
_shape
;
std
::
vector
<
int
>
_type
;
std
::
vector
<
int64_t
>
_last_request_ts
;
};
}
// namespace general_model
...
...
core/general-client/src/general_model.cpp
浏览文件 @
9afdca0a
...
...
@@ -17,7 +17,9 @@
#include "core/sdk-cpp/builtin_format.pb.h"
#include "core/sdk-cpp/include/common.h"
#include "core/sdk-cpp/include/predictor_sdk.h"
#include "core/util/include/timer.h"
using
baidu
::
paddle_serving
::
Timer
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
Request
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
Response
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
Tensor
;
...
...
@@ -117,15 +119,20 @@ std::vector<std::vector<float>> PredictorClient::predict(
return
fetch_result
;
}
Timer
timeline
;
int64_t
preprocess_start
=
timeline
.
TimeStampUS
();
// we save infer_us at fetch_result[fetch_name.size()]
fetch_result
.
resize
(
fetch_name
.
size
()
+
1
);
_api
.
thrd_clear
();
_predictor
=
_api
.
fetch_predictor
(
"general_model"
);
VLOG
(
2
)
<<
"fetch general model predictor done."
;
VLOG
(
2
)
<<
"float feed name size: "
<<
float_feed_name
.
size
();
VLOG
(
2
)
<<
"int feed name size: "
<<
int_feed_name
.
size
();
VLOG
(
2
)
<<
"fetch name size: "
<<
fetch_name
.
size
();
Request
req
;
for
(
auto
&
name
:
fetch_name
)
{
req
.
add_fetch_var_names
(
name
);
...
...
@@ -175,16 +182,21 @@ std::vector<std::vector<float>> PredictorClient::predict(
vec_idx
++
;
}
VLOG
(
2
)
<<
"feed int feed var done."
;
int64_t
preprocess_end
=
timeline
.
TimeStampUS
()
;
// std::map<std::string, std::vector<float> > result
;
int64_t
client_infer_start
=
timeline
.
TimeStampUS
()
;
Response
res
;
int64_t
client_infer_end
=
0
;
int64_t
postprocess_start
=
0
;
int64_t
postprocess_end
=
0
;
res
.
Clear
();
if
(
_predictor
->
inference
(
&
req
,
&
res
)
!=
0
)
{
LOG
(
ERROR
)
<<
"failed call predictor with req: "
<<
req
.
ShortDebugString
();
exit
(
-
1
);
}
else
{
client_infer_end
=
timeline
.
TimeStampUS
();
postprocess_start
=
client_infer_end
;
for
(
auto
&
name
:
fetch_name
)
{
int
idx
=
_fetch_name_to_idx
[
name
];
int
len
=
res
.
insts
(
0
).
tensor_array
(
idx
).
data_size
();
...
...
@@ -196,10 +208,24 @@ std::vector<std::vector<float>> PredictorClient::predict(
*
(
const
float
*
)
res
.
insts
(
0
).
tensor_array
(
idx
).
data
(
i
).
c_str
();
}
}
fetch_result
[
fetch_name
.
size
()].
resize
(
1
);
fetch_result
[
fetch_name
.
size
()][
0
]
=
res
.
mean_infer_us
();
postprocess_end
=
timeline
.
TimeStampUS
();
}
int
op_num
=
res
.
profile_time_size
()
/
2
;
VLOG
(
2
)
<<
"preprocess start: "
<<
preprocess_start
;
VLOG
(
2
)
<<
"preprocess end: "
<<
preprocess_end
;
VLOG
(
2
)
<<
"client infer start: "
<<
client_infer_start
;
VLOG
(
2
)
<<
"op1 start: "
<<
res
.
profile_time
(
0
);
VLOG
(
2
)
<<
"op1 end: "
<<
res
.
profile_time
(
1
);
VLOG
(
2
)
<<
"op2 start: "
<<
res
.
profile_time
(
2
);
VLOG
(
2
)
<<
"op2 end: "
<<
res
.
profile_time
(
3
);
VLOG
(
2
)
<<
"op3 start: "
<<
res
.
profile_time
(
4
);
VLOG
(
2
)
<<
"op3 end: "
<<
res
.
profile_time
(
5
);
VLOG
(
2
)
<<
"client infer end: "
<<
client_infer_end
;
VLOG
(
2
)
<<
"client postprocess start: "
<<
postprocess_start
;
VLOG
(
2
)
<<
"client postprocess end: "
<<
postprocess_end
;
return
fetch_result
;
}
...
...
@@ -308,10 +334,10 @@ std::vector<std::vector<std::vector<float>>> PredictorClient::batch_predict(
}
}
}
//last index for infer time
fetch_result_batch
[
batch_size
].
resize
(
1
);
fetch_result_batch
[
batch_size
][
0
].
resize
(
1
);
fetch_result_batch
[
batch_size
][
0
][
0
]
=
res
.
mean_infer_us
();
//
last index for infer time
//
fetch_result_batch[batch_size].resize(1);
//
fetch_result_batch[batch_size][0].resize(1);
//
fetch_result_batch[batch_size][0][0] = res.mean_infer_us();
}
return
fetch_result_batch
;
...
...
core/general-server/op/general_infer_helper.h
浏览文件 @
9afdca0a
...
...
@@ -14,6 +14,7 @@
#pragma once
#include <string.h>
#include <vector>
#ifdef BCLOUD
#ifdef WITH_GPU
...
...
@@ -34,8 +35,8 @@ static const char* GENERAL_MODEL_NAME = "general_model";
struct
GeneralBlob
{
std
::
vector
<
paddle
::
PaddleTensor
>
tensor_vector
;
double
infer_time
;
std
::
vector
<
std
::
string
>
fetch_name_vector
;
int64_t
time_stamp
[
20
]
;
int
p_size
=
0
;
void
Clear
()
{
size_t
tensor_count
=
tensor_vector
.
size
();
...
...
@@ -60,6 +61,18 @@ struct GeneralBlob {
std
::
string
ShortDebugString
()
const
{
return
"Not implemented!"
;
}
};
static
void
AddBlobInfo
(
GeneralBlob
*
blob
,
int64_t
init_value
)
{
blob
->
time_stamp
[
blob
->
p_size
]
=
init_value
;
blob
->
p_size
++
;
}
static
void
CopyBlobInfo
(
const
GeneralBlob
*
src
,
GeneralBlob
*
tgt
)
{
memcpy
(
&
(
tgt
->
time_stamp
[
0
]),
&
(
src
->
time_stamp
[
0
]),
src
->
p_size
*
sizeof
(
int64_t
));
}
}
// namespace serving
}
// namespace paddle_serving
}
// namespace baidu
core/general-server/op/general_infer_op.cpp
浏览文件 @
9afdca0a
...
...
@@ -54,15 +54,19 @@ int GeneralInferOp::inference() {
VLOG
(
2
)
<<
"infer batch size: "
<<
batch_size
;
// infer
Timer
timeline
;
double
infer_time
=
0.0
;
// double infer_time = 0.0;
int64_t
start
=
timeline
.
TimeStampUS
();
timeline
.
Start
();
if
(
InferManager
::
instance
().
infer
(
GENERAL_MODEL_NAME
,
in
,
out
,
batch_size
))
{
LOG
(
ERROR
)
<<
"Failed do infer in fluid model: "
<<
GENERAL_MODEL_NAME
;
return
-
1
;
}
timeline
.
Pause
();
infer_time
=
timeline
.
ElapsedUS
();
// timeline.Pause();
// infer_time = timeline.ElapsedUS();
int64_t
end
=
timeline
.
TimeStampUS
();
CopyBlobInfo
(
input_blob
,
output_blob
);
AddBlobInfo
(
output_blob
,
start
);
AddBlobInfo
(
output_blob
,
end
);
return
0
;
}
DEFINE_OP
(
GeneralInferOp
);
...
...
core/general-server/op/general_reader_op.cpp
浏览文件 @
9afdca0a
...
...
@@ -20,11 +20,13 @@
#include "core/general-server/op/general_reader_op.h"
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/util/include/timer.h"
namespace
baidu
{
namespace
paddle_serving
{
namespace
serving
{
using
baidu
::
paddle_serving
::
Timer
;
using
baidu
::
paddle_serving
::
predictor
::
MempoolWrapper
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
Tensor
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
Request
;
...
...
@@ -85,6 +87,10 @@ int GeneralReaderOp::inference() {
LOG
(
ERROR
)
<<
"Failed get op tls reader object output"
;
}
Timer
timeline
;
// double read_time = 0.0;
// timeline.Start();
int64_t
start
=
timeline
.
TimeStampUS
();
int
var_num
=
req
->
insts
(
0
).
tensor_array_size
();
VLOG
(
2
)
<<
"var num: "
<<
var_num
;
// read config
...
...
@@ -196,6 +202,13 @@ int GeneralReaderOp::inference() {
}
}
timeline
.
Pause
();
// read_time = timeline.ElapsedUS();
int64_t
end
=
timeline
.
TimeStampUS
();
res
->
p_size
=
0
;
AddBlobInfo
(
res
,
start
);
AddBlobInfo
(
res
,
end
);
VLOG
(
2
)
<<
"read data from client success"
;
return
0
;
}
...
...
core/general-server/op/general_response_op.cpp
浏览文件 @
9afdca0a
...
...
@@ -53,6 +53,11 @@ int GeneralResponseOp::inference() {
const
Request
*
req
=
dynamic_cast
<
const
Request
*>
(
get_request_message
());
Timer
timeline
;
// double response_time = 0.0;
// timeline.Start();
int64_t
start
=
timeline
.
TimeStampUS
();
VLOG
(
2
)
<<
"start to call load general model_conf op"
;
baidu
::
paddle_serving
::
predictor
::
Resource
&
resource
=
baidu
::
paddle_serving
::
predictor
::
Resource
::
instance
();
...
...
@@ -67,7 +72,7 @@ int GeneralResponseOp::inference() {
fetch_index
[
i
]
=
model_config
->
_fetch_alias_name_to_index
[
req
->
fetch_var_names
(
i
)];
}
// response inst with only fetch_var_names
Response
*
res
=
mutable_data
<
Response
>
();
...
...
@@ -118,6 +123,18 @@ int GeneralResponseOp::inference() {
}
var_idx
++
;
}
// timeline.Pause();
// response_time = timeline.ElapsedUS();
int64_t
end
=
timeline
.
TimeStampUS
();
VLOG
(
2
)
<<
"p size for input blob: "
<<
input_blob
->
p_size
;
for
(
int
i
=
0
;
i
<
input_blob
->
p_size
;
++
i
)
{
res
->
add_profile_time
(
input_blob
->
time_stamp
[
i
]);
}
// TODO(guru4elephant): find more elegant way to do this
res
->
add_profile_time
(
start
);
res
->
add_profile_time
(
end
);
return
0
;
}
...
...
core/general-server/op/general_text_reader_op.cpp
浏览文件 @
9afdca0a
...
...
@@ -19,11 +19,13 @@
#include "core/general-server/op/general_text_reader_op.h"
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/util/include/timer.h"
namespace
baidu
{
namespace
paddle_serving
{
namespace
serving
{
using
baidu
::
paddle_serving
::
Timer
;
using
baidu
::
paddle_serving
::
predictor
::
MempoolWrapper
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
Tensor
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
Request
;
...
...
@@ -54,6 +56,11 @@ int GeneralTextReaderOp::inference() {
return
-
1
;
}
Timer
timeline
;
// double read_time = 0.0;
// timeline.Start();
int64_t
start
=
timeline
.
TimeStampUS
();
int
var_num
=
req
->
insts
(
0
).
tensor_array_size
();
VLOG
(
2
)
<<
"var num: "
<<
var_num
;
// read config
...
...
@@ -157,6 +164,12 @@ int GeneralTextReaderOp::inference() {
}
}
// timeline.Pause();
// read_time = timeline.ElapsedUS();
int64_t
end
=
timeline
.
TimeStampUS
();
AddBlobInfo
(
res
,
start
);
AddBlobInfo
(
res
,
end
);
VLOG
(
2
)
<<
"read data from client success"
;
return
0
;
}
...
...
core/general-server/op/general_text_response_op.cpp
浏览文件 @
9afdca0a
...
...
@@ -53,6 +53,11 @@ int GeneralTextResponseOp::inference() {
const
Request
*
req
=
dynamic_cast
<
const
Request
*>
(
get_request_message
());
Timer
timeline
;
// double response_time = 0.0;
// timeline.Start();
int64_t
start
=
timeline
.
TimeStampUS
();
VLOG
(
2
)
<<
"start to call load general model_conf op"
;
baidu
::
paddle_serving
::
predictor
::
Resource
&
resource
=
baidu
::
paddle_serving
::
predictor
::
Resource
::
instance
();
...
...
@@ -118,6 +123,18 @@ int GeneralTextResponseOp::inference() {
}
var_idx
++
;
}
// timeline.Pause();
// response_time = timeline.ElapsedUS();
int64_t
end
=
timeline
.
TimeStampUS
();
for
(
int
i
=
0
;
i
<
input_blob
->
p_size
;
++
i
)
{
res
->
add_profile_time
(
input_blob
->
time_stamp
[
i
]);
}
// TODO(guru4elephant): find more elegant way to do this
res
->
add_profile_time
(
start
);
res
->
add_profile_time
(
end
);
return
0
;
}
DEFINE_OP
(
GeneralTextResponseOp
);
...
...
core/general-server/proto/general_model_service.proto
浏览文件 @
9afdca0a
...
...
@@ -42,7 +42,7 @@ message Request {
message
Response
{
repeated
FetchInst
insts
=
1
;
optional
float
mean_infer_us
=
2
;
repeated
int64
profile_time
=
2
;
};
service
GeneralModelService
{
...
...
core/predictor/framework/service.cpp
浏览文件 @
9afdca0a
...
...
@@ -147,6 +147,7 @@ int InferService::inference(const google::protobuf::Message* request,
TRACEPRINTF
(
"finish to thread clear"
);
if
(
_enable_map_request_to_workflow
)
{
LOG
(
INFO
)
<<
"enable map request == True"
;
std
::
vector
<
Workflow
*>*
workflows
=
_map_request_to_workflow
(
request
);
if
(
!
workflows
||
workflows
->
size
()
==
0
)
{
LOG
(
ERROR
)
<<
"Failed to map request to workflow"
;
...
...
@@ -169,6 +170,7 @@ int InferService::inference(const google::protobuf::Message* request,
}
}
}
else
{
LOG
(
INFO
)
<<
"enable map request == False"
;
TRACEPRINTF
(
"start to execute one workflow"
);
size_t
fsize
=
_flows
.
size
();
for
(
size_t
fi
=
0
;
fi
<
fsize
;
++
fi
)
{
...
...
@@ -233,6 +235,7 @@ int InferService::_execute_workflow(Workflow* workflow,
TRACEPRINTF
(
"finish to copy from"
);
workflow_time
.
stop
();
LOG
(
INFO
)
<<
"workflow total time: "
<<
workflow_time
.
u_elapsed
();
PredictorMetric
::
GetInstance
()
->
update_latency_metric
(
WORKFLOW_METRIC_PREFIX
+
dv
->
full_name
(),
workflow_time
.
u_elapsed
());
...
...
core/sdk-cpp/proto/general_model_service.proto
浏览文件 @
9afdca0a
...
...
@@ -42,7 +42,7 @@ message Request {
message
Response
{
repeated
FetchInst
insts
=
1
;
optional
float
mean_infer_us
=
2
;
repeated
int64
profile_time
=
2
;
};
service
GeneralModelService
{
...
...
core/util/CMakeLists.txt
浏览文件 @
9afdca0a
include
(
src/CMakeLists.txt
)
add_library
(
utils
${
util_srcs
}
)
core/util/include/timer.h
浏览文件 @
9afdca0a
...
...
@@ -38,6 +38,7 @@ class Timer {
double
ElapsedMS
();
// return elapsed time in sec
double
ElapsedSec
();
int64_t
TimeStampUS
();
private:
struct
timeval
_start
;
...
...
core/util/src/CMakeLists.txt
浏览文件 @
9afdca0a
FILE
(
GLOB srcs
${
CMAKE_CURRENT_LIST_DIR
}
/*.cc
)
LIST
(
APPEND util_srcs
${
srcs
}
)
core/util/src/timer.cc
浏览文件 @
9afdca0a
...
...
@@ -54,6 +54,11 @@ double Timer::ElapsedMS() { return _elapsed / 1000.0; }
double
Timer
::
ElapsedSec
()
{
return
_elapsed
/
1000000.0
;
}
int64_t
Timer
::
TimeStampUS
()
{
gettimeofday
(
&
_now
,
NULL
);
return
_now
.
tv_usec
;
}
int64_t
Timer
::
Tickus
()
{
gettimeofday
(
&
_now
,
NULL
);
return
(
_now
.
tv_sec
-
_start
.
tv_sec
)
*
1000
*
1000L
+
...
...
python/paddle_serving_client/__init__.py
浏览文件 @
9afdca0a
...
...
@@ -143,9 +143,6 @@ class Client(object):
for
i
,
name
in
enumerate
(
fetch_names
):
result_map
[
name
]
=
result
[
i
]
if
profile
:
result_map
[
"infer_time"
]
=
result
[
-
1
][
0
]
return
result_map
def
batch_predict
(
self
,
feed_batch
=
[],
fetch
=
[],
profile
=
False
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录