Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Serving
提交
f0e5a0bd
S
Serving
项目概览
PaddlePaddle
/
Serving
大约 1 年 前同步成功
通知
186
Star
833
Fork
253
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
105
列表
看板
标记
里程碑
合并请求
10
Wiki
2
Wiki
分析
仓库
DevOps
项目成员
Pages
S
Serving
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
105
Issue
105
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
2
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f0e5a0bd
编写于
4月 13, 2020
作者:
B
barrierye
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update code
上级
28368f7d
变更
17
显示空白变更内容
内联
并排
Showing
17 changed file
with
272 addition
and
200 deletion
+272
-200
core/general-client/include/general_model.h
core/general-client/include/general_model.h
+25
-7
core/general-client/src/general_model.cpp
core/general-client/src/general_model.cpp
+66
-55
core/general-client/src/pybind_general_model.cpp
core/general-client/src/pybind_general_model.cpp
+7
-5
core/general-server/op/general_infer_op.cpp
core/general-server/op/general_infer_op.cpp
+20
-19
core/general-server/op/general_reader_op.cpp
core/general-server/op/general_reader_op.cpp
+1
-0
core/general-server/op/general_response_op.cpp
core/general-server/op/general_response_op.cpp
+94
-83
core/general-server/op/general_text_response_op.cpp
core/general-server/op/general_text_response_op.cpp
+6
-2
core/general-server/proto/general_model_service.proto
core/general-server/proto/general_model_service.proto
+6
-1
core/predictor/framework/dag_view.cpp
core/predictor/framework/dag_view.cpp
+9
-8
core/predictor/framework/server.cpp
core/predictor/framework/server.cpp
+2
-0
core/predictor/framework/service.cpp
core/predictor/framework/service.cpp
+1
-0
core/predictor/framework/workflow.cpp
core/predictor/framework/workflow.cpp
+3
-1
core/predictor/op/op.h
core/predictor/op/op.h
+18
-15
core/predictor/src/pdserving.cpp
core/predictor/src/pdserving.cpp
+2
-0
core/sdk-cpp/proto/general_model_service.proto
core/sdk-cpp/proto/general_model_service.proto
+6
-1
ensemble-demo/client.py
ensemble-demo/client.py
+4
-2
ensemble-demo/server.py
ensemble-demo/server.py
+2
-1
未找到文件。
core/general-client/include/general_model.h
浏览文件 @
f0e5a0bd
...
...
@@ -39,11 +39,10 @@ namespace baidu {
namespace
paddle_serving
{
namespace
general_model
{
class
Predictor
Res
{
class
Model
Res
{
public:
PredictorRes
()
{}
~
PredictorRes
()
{}
ModelRes
()
{}
~
ModelRes
()
{}
public:
const
std
::
vector
<
std
::
vector
<
int64_t
>>&
get_int64_by_name
(
const
std
::
string
&
name
)
{
...
...
@@ -53,14 +52,33 @@ class PredictorRes {
const
std
::
string
&
name
)
{
return
_float_map
[
name
];
}
public:
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
vector
<
int64_t
>>>
_int64_map
;
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
vector
<
float
>>>
_float_map
;
};
class
PredictorRes
{
public:
PredictorRes
()
{}
~
PredictorRes
()
{}
public:
void
clear
()
{
_models
.
clear
();}
const
std
::
vector
<
std
::
vector
<
int64_t
>>&
get_int64_by_name
(
const
int
model_idx
,
const
std
::
string
&
name
)
{
return
_models
[
model_idx
].
get_int64_by_name
(
name
);
}
const
std
::
vector
<
std
::
vector
<
float
>>&
get_float_by_name
(
const
int
model_idx
,
const
std
::
string
&
name
)
{
return
_models
[
model_idx
].
get_float_by_name
(
name
);
}
void
set_variant_tag
(
const
std
::
string
&
variant_tag
)
{
_variant_tag
=
variant_tag
;
}
const
std
::
string
&
variant_tag
()
{
return
_variant_tag
;
}
int
models_num
()
{
return
_models
.
size
();}
public:
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
vector
<
int64_t
>>>
_int64_map
;
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
vector
<
float
>>>
_float_map
;
std
::
vector
<
ModelRes
>
_models
;
private:
std
::
string
_variant_tag
;
...
...
core/general-client/src/general_model.cpp
浏览文件 @
f0e5a0bd
...
...
@@ -139,8 +139,7 @@ int PredictorClient::predict(const std::vector<std::vector<float>> &float_feed,
const
std
::
vector
<
std
::
string
>
&
fetch_name
,
PredictorRes
&
predict_res
,
const
int
&
pid
)
{
// NOLINT
predict_res
.
_int64_map
.
clear
();
predict_res
.
_float_map
.
clear
();
predict_res
.
clear
();
Timer
timeline
;
int64_t
preprocess_start
=
timeline
.
TimeStampUS
();
_api
.
thrd_clear
();
...
...
@@ -215,30 +214,37 @@ int PredictorClient::predict(const std::vector<std::vector<float>> &float_feed,
VLOG
(
2
)
<<
"predict done."
;
client_infer_end
=
timeline
.
TimeStampUS
();
postprocess_start
=
client_infer_end
;
// severaal model output
uint32_t
model_num
=
res
.
outputs_size
();
predict_res
.
_models
.
resize
(
model_num
);
for
(
uint32_t
m_idx
=
0
;
m_idx
<
model_num
;
++
m_idx
)
{
VLOG
(
2
)
<<
"process model output index: "
<<
m_idx
;
auto
output
=
res
.
outputs
(
m_idx
);
for
(
auto
&
name
:
fetch_name
)
{
int
idx
=
_fetch_name_to_idx
[
name
];
VLOG
(
2
)
<<
"fetch name: "
<<
name
;
if
(
_fetch_name_to_type
[
name
]
==
0
)
{
int
len
=
res
.
insts
(
0
).
tensor_array
(
idx
).
int64_data_size
();
int
len
=
output
.
insts
(
0
).
tensor_array
(
idx
).
int64_data_size
();
VLOG
(
2
)
<<
"fetch tensor : "
<<
name
<<
" type: int64 len : "
<<
len
;
predict_res
.
_int64_map
[
name
].
resize
(
1
);
predict_res
.
_int64_map
[
name
][
0
].
resize
(
len
);
predict_res
.
_models
[
m_idx
]
.
_int64_map
[
name
].
resize
(
1
);
predict_res
.
_models
[
m_idx
]
.
_int64_map
[
name
][
0
].
resize
(
len
);
for
(
int
i
=
0
;
i
<
len
;
++
i
)
{
predict_res
.
_int64_map
[
name
][
0
][
i
]
=
res
.
insts
(
0
).
tensor_array
(
idx
).
int64_data
(
i
);
predict_res
.
_models
[
m_idx
]
.
_int64_map
[
name
][
0
][
i
]
=
output
.
insts
(
0
).
tensor_array
(
idx
).
int64_data
(
i
);
}
}
else
if
(
_fetch_name_to_type
[
name
]
==
1
)
{
int
len
=
res
.
insts
(
0
).
tensor_array
(
idx
).
float_data_size
();
int
len
=
output
.
insts
(
0
).
tensor_array
(
idx
).
float_data_size
();
VLOG
(
2
)
<<
"fetch tensor : "
<<
name
<<
" type: float32 len : "
<<
len
;
predict_res
.
_float_map
[
name
].
resize
(
1
);
predict_res
.
_float_map
[
name
][
0
].
resize
(
len
);
predict_res
.
_models
[
m_idx
]
.
_float_map
[
name
].
resize
(
1
);
predict_res
.
_models
[
m_idx
]
.
_float_map
[
name
][
0
].
resize
(
len
);
for
(
int
i
=
0
;
i
<
len
;
++
i
)
{
predict_res
.
_float_map
[
name
][
0
][
i
]
=
res
.
insts
(
0
).
tensor_array
(
idx
).
float_data
(
i
);
predict_res
.
_models
[
m_idx
].
_float_map
[
name
][
0
][
i
]
=
output
.
insts
(
0
).
tensor_array
(
idx
).
float_data
(
i
);
}
}
}
postprocess_end
=
timeline
.
TimeStampUS
();
}
postprocess_end
=
timeline
.
TimeStampUS
();
}
if
(
FLAGS_profile_client
)
{
...
...
@@ -249,7 +255,7 @@ int PredictorClient::predict(const std::vector<std::vector<float>> &float_feed,
<<
"prepro_1:"
<<
preprocess_end
<<
" "
<<
"client_infer_0:"
<<
client_infer_start
<<
" "
<<
"client_infer_1:"
<<
client_infer_end
<<
" "
;
//TODO: multi-model
if
(
FLAGS_profile_server
)
{
int
op_num
=
res
.
profile_time_size
()
/
2
;
for
(
int
i
=
0
;
i
<
op_num
;
++
i
)
{
...
...
@@ -276,8 +282,7 @@ int PredictorClient::batch_predict(
const
int
&
pid
)
{
int
batch_size
=
std
::
max
(
float_feed_batch
.
size
(),
int_feed_batch
.
size
());
predict_res_batch
.
_int64_map
.
clear
();
predict_res_batch
.
_float_map
.
clear
();
predict_res_batch
.
clear
();
Timer
timeline
;
int64_t
preprocess_start
=
timeline
.
TimeStampUS
();
...
...
@@ -294,7 +299,7 @@ int PredictorClient::batch_predict(
for
(
auto
&
name
:
fetch_name
)
{
req
.
add_fetch_var_names
(
name
);
}
//
for
(
int
bi
=
0
;
bi
<
batch_size
;
bi
++
)
{
VLOG
(
2
)
<<
"prepare batch "
<<
bi
;
std
::
vector
<
Tensor
*>
tensor_vec
;
...
...
@@ -371,34 +376,40 @@ int PredictorClient::batch_predict(
}
else
{
client_infer_end
=
timeline
.
TimeStampUS
();
postprocess_start
=
client_infer_end
;
uint32_t
model_num
=
res
.
outputs_size
();
predict_res_batch
.
_models
.
resize
(
model_num
);
for
(
uint32_t
m_idx
=
0
;
m_idx
<
model_num
;
++
m_idx
)
{
VLOG
(
2
)
<<
"process model output index: "
<<
m_idx
;
auto
output
=
res
.
outputs
(
m_idx
);
for
(
auto
&
name
:
fetch_name
)
{
predict_res_batch
.
_int64_map
[
name
].
resize
(
batch_size
);
predict_res_batch
.
_float_map
[
name
].
resize
(
batch_size
);
predict_res_batch
.
_models
[
m_idx
]
.
_int64_map
[
name
].
resize
(
batch_size
);
predict_res_batch
.
_models
[
m_idx
]
.
_float_map
[
name
].
resize
(
batch_size
);
}
for
(
int
bi
=
0
;
bi
<
batch_size
;
bi
++
)
{
for
(
auto
&
name
:
fetch_name
)
{
int
idx
=
_fetch_name_to_idx
[
name
];
int
len
=
res
.
insts
(
bi
).
tensor_array
(
idx
).
data_size
();
int
len
=
output
.
insts
(
bi
).
tensor_array
(
idx
).
data_size
();
if
(
_fetch_name_to_type
[
name
]
==
0
)
{
int
len
=
res
.
insts
(
bi
).
tensor_array
(
idx
).
int64_data_size
();
int
len
=
output
.
insts
(
bi
).
tensor_array
(
idx
).
int64_data_size
();
VLOG
(
2
)
<<
"fetch tensor : "
<<
name
<<
" type: int64 len : "
<<
len
;
predict_res_batch
.
_int64_map
[
name
][
bi
].
resize
(
len
);
predict_res_batch
.
_models
[
m_idx
]
.
_int64_map
[
name
][
bi
].
resize
(
len
);
VLOG
(
2
)
<<
"fetch name "
<<
name
<<
" index "
<<
idx
<<
" first data "
<<
res
.
insts
(
bi
).
tensor_array
(
idx
).
int64_data
(
0
);
<<
output
.
insts
(
bi
).
tensor_array
(
idx
).
int64_data
(
0
);
for
(
int
i
=
0
;
i
<
len
;
++
i
)
{
predict_res_batch
.
_int64_map
[
name
][
bi
][
i
]
=
res
.
insts
(
bi
).
tensor_array
(
idx
).
int64_data
(
i
);
predict_res_batch
.
_models
[
m_idx
]
.
_int64_map
[
name
][
bi
][
i
]
=
output
.
insts
(
bi
).
tensor_array
(
idx
).
int64_data
(
i
);
}
}
else
if
(
_fetch_name_to_type
[
name
]
==
1
)
{
int
len
=
res
.
insts
(
bi
).
tensor_array
(
idx
).
float_data_size
();
int
len
=
output
.
insts
(
bi
).
tensor_array
(
idx
).
float_data_size
();
VLOG
(
2
)
<<
"fetch tensor : "
<<
name
<<
" type: float32 len : "
<<
len
;
predict_res_batch
.
_float_map
[
name
][
bi
].
resize
(
len
);
predict_res_batch
.
_models
[
m_idx
]
.
_float_map
[
name
][
bi
].
resize
(
len
);
VLOG
(
2
)
<<
"fetch name "
<<
name
<<
" index "
<<
idx
<<
" first data "
<<
res
.
insts
(
bi
).
tensor_array
(
idx
).
float_data
(
0
);
<<
output
.
insts
(
bi
).
tensor_array
(
idx
).
float_data
(
0
);
for
(
int
i
=
0
;
i
<
len
;
++
i
)
{
predict_res_batch
.
_float_map
[
name
][
bi
][
i
]
=
res
.
insts
(
bi
).
tensor_array
(
idx
).
float_data
(
i
);
predict_res_batch
.
_models
[
m_idx
].
_float_map
[
name
][
bi
][
i
]
=
output
.
insts
(
bi
).
tensor_array
(
idx
).
float_data
(
i
);
}
}
}
}
...
...
@@ -414,7 +425,7 @@ int PredictorClient::batch_predict(
<<
"prepro_1:"
<<
preprocess_end
<<
" "
<<
"client_infer_0:"
<<
client_infer_start
<<
" "
<<
"client_infer_1:"
<<
client_infer_end
<<
" "
;
//TODO: multi-models
if
(
FLAGS_profile_server
)
{
int
op_num
=
res
.
profile_time_size
()
/
2
;
for
(
int
i
=
0
;
i
<
op_num
;
++
i
)
{
...
...
core/general-client/src/pybind_general_model.cpp
浏览文件 @
f0e5a0bd
...
...
@@ -31,17 +31,19 @@ PYBIND11_MODULE(serving_client, m) {
py
::
class_
<
PredictorRes
>
(
m
,
"PredictorRes"
,
py
::
buffer_protocol
())
.
def
(
py
::
init
())
.
def
(
"get_int64_by_name"
,
[](
PredictorRes
&
self
,
std
::
string
&
name
)
{
return
self
.
get_int64_by_name
(
name
);
[](
PredictorRes
&
self
,
int
model_idx
,
std
::
string
&
name
)
{
return
self
.
get_int64_by_name
(
model_idx
,
name
);
},
py
::
return_value_policy
::
reference
)
.
def
(
"get_float_by_name"
,
[](
PredictorRes
&
self
,
std
::
string
&
name
)
{
return
self
.
get_float_by_name
(
name
);
[](
PredictorRes
&
self
,
int
model_idx
,
std
::
string
&
name
)
{
return
self
.
get_float_by_name
(
model_idx
,
name
);
},
py
::
return_value_policy
::
reference
)
.
def
(
"variant_tag"
,
[](
PredictorRes
&
self
)
{
return
self
.
variant_tag
();
});
[](
PredictorRes
&
self
)
{
return
self
.
variant_tag
();
})
.
def
(
"models_num"
,
[](
PredictorRes
&
self
)
{
return
self
.
models_num
();
});
py
::
class_
<
PredictorClient
>
(
m
,
"PredictorClient"
,
py
::
buffer_protocol
())
.
def
(
py
::
init
())
...
...
core/general-server/op/general_infer_op.cpp
浏览文件 @
f0e5a0bd
...
...
@@ -37,40 +37,41 @@ using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int
GeneralInferOp
::
inference
()
{
VLOG
(
2
)
<<
"Going to run inference"
;
//const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name());
// const GeneralBlob *input_blob =
// get_depend_argument<GeneralBlob>(pre_name());
VLOG
(
2
)
<<
"try to get output_blob"
;
GeneralBlob
*
output_blob
=
mutable_data
<
GeneralBlob
>
();
VLOG
(
2
)
<<
"finish get output_blob"
;
fprintf
(
stderr
,
"[output] blob address %x
\n
"
,
output_blob
)
;
TensorVector
*
out
=
&
output_blob
->
tensor_vector
;
VLOG
(
2
)
<<
"finish get *out"
;
const
std
::
vector
<
std
::
string
>
pre_node_names
=
pre_names
();
VLOG
(
2
)
<<
"pre node names size: "
<<
pre_node_names
.
size
();
TensorVector
input
;
int
batch_size
=
0
;
const
GeneralBlob
*
input_blob
;
for
(
int
i
=
0
;
i
<
(
int
)
pre_node_names
.
size
();
++
i
)
{
VLOG
(
2
)
<<
"pre names["
<<
i
<<
"]: "
<<
pre_node_names
[
i
];
input_blob
=
get_depend_argument
<
GeneralBlob
>
(
pre_node_names
[
i
]);
fprintf
(
stderr
,
"input blob address %x
\n
"
,
input_blob
);
for
(
uint32_t
i
=
0
;
i
<
pre_node_names
.
size
();
++
i
)
{
VLOG
(
2
)
<<
"pre names["
<<
i
<<
"]: "
<<
pre_node_names
[
i
];
input_blob
=
get_depend_argument
<
GeneralBlob
>
(
pre_node_names
[
i
]);
if
(
!
input_blob
)
{
LOG
(
ERROR
)
<<
"Failed mutable depended argument, op:"
<<
pre_node_names
[
i
];
LOG
(
ERROR
)
<<
"Failed mutable depended argument, op:"
<<
pre_node_names
[
i
];
return
-
1
;
}
fprintf
(
stderr
,
"[input] blob address %x
\n
"
,
input_blob
);
batch_size
=
input_blob
->
GetBatchSize
();
VLOG
(
2
)
<<
"batch size of input: "
<<
batch_size
;
for
(
in
t
j
=
0
;
j
<
input_blob
->
tensor_vector
.
size
();
++
j
)
{
VLOG
(
2
)
<<
"input tensor["
<<
j
<<
"]: "
<<
input_blob
->
tensor_vector
[
j
].
name
;
for
(
uint32_
t
j
=
0
;
j
<
input_blob
->
tensor_vector
.
size
();
++
j
)
{
VLOG
(
2
)
<<
"input tensor["
<<
j
<<
"]: "
<<
input_blob
->
tensor_vector
[
j
].
name
;
input
.
push_back
(
input_blob
->
tensor_vector
[
j
]);
VLOG
(
2
)
<<
"add an input tensor name: "
<<
input_blob
->
tensor_vector
[
j
].
name
;
VLOG
(
2
)
<<
"add an input tensor name: "
<<
input_blob
->
tensor_vector
[
j
].
name
;
}
}
VLOG
(
2
)
<<
"get output blob done."
;
const
TensorVector
*
in
=
&
input
;
VLOG
(
2
)
<<
"get input done."
;
batch_size
=
1
;
VLOG
(
2
)
<<
"infer batch size: "
<<
batch_size
;
...
...
@@ -81,7 +82,7 @@ int GeneralInferOp::inference() {
timeline
.
Start
();
VLOG
(
2
)
<<
"input of op "
<<
op_name
();
for
(
in
t
i
=
0
;
i
<
in
->
size
();
++
i
)
{
for
(
uint32_
t
i
=
0
;
i
<
in
->
size
();
++
i
)
{
VLOG
(
2
)
<<
in
->
at
(
i
).
name
;
}
...
...
@@ -94,7 +95,7 @@ int GeneralInferOp::inference() {
}
VLOG
(
2
)
<<
"output of op "
<<
op_name
();
for
(
in
t
i
=
0
;
i
<
out
->
size
();
++
i
)
{
for
(
uint32_
t
i
=
0
;
i
<
out
->
size
();
++
i
)
{
VLOG
(
2
)
<<
out
->
at
(
i
).
name
;
}
...
...
core/general-server/op/general_reader_op.cpp
浏览文件 @
f0e5a0bd
...
...
@@ -80,6 +80,7 @@ int GeneralReaderOp::inference() {
std
::
vector
<
int64_t
>
capacity
;
GeneralBlob
*
res
=
mutable_data
<
GeneralBlob
>
();
fprintf
(
stderr
,
"[reader] out blob address %x
\n
"
,
res
);
TensorVector
*
out
=
&
res
->
tensor_vector
;
res
->
SetBatchSize
(
batch_size
);
...
...
core/general-server/op/general_response_op.cpp
浏览文件 @
f0e5a0bd
...
...
@@ -33,6 +33,7 @@ using baidu::paddle_serving::predictor::general_model::Tensor;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
Response
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
Request
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
FetchInst
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
ModelOutput
;
using
baidu
::
paddle_serving
::
predictor
::
InferManager
;
using
baidu
::
paddle_serving
::
predictor
::
PaddleGeneralModelConfig
;
...
...
@@ -40,22 +41,12 @@ int GeneralResponseOp::inference() {
const
std
::
vector
<
std
::
string
>
pre_node_names
=
pre_names
();
VLOG
(
2
)
<<
"pre node names size: "
<<
pre_node_names
.
size
();
const
GeneralBlob
*
input_blob
=
get_depend_argument
<
GeneralBlob
>
(
pre_node_names
[
0
]);
if
(
!
input_blob
)
{
LOG
(
ERROR
)
<<
"Failed mutable depended argument, op: "
<<
pre_node_names
[
0
];
return
-
1
;
}
const
TensorVector
*
in
=
&
input_blob
->
tensor_vector
;
int
batch_size
=
input_blob
->
GetBatchSize
();
VLOG
(
2
)
<<
"input batch size: "
<<
batch_size
;
const
Request
*
req
=
dynamic_cast
<
const
Request
*>
(
get_request_message
());
// response inst with only fetch_var_names
Response
*
res
=
mutable_data
<
Response
>
();
Timer
timeline
;
// double res
p
onse_time = 0.0;
// double res
i
onse_time = 0.0;
// timeline.Start();
int64_t
start
=
timeline
.
TimeStampUS
();
...
...
@@ -74,11 +65,26 @@ int GeneralResponseOp::inference() {
model_config
->
_fetch_alias_name_to_index
[
req
->
fetch_var_names
(
i
)];
}
// response inst with only fetch_var_names
Response
*
res
=
mutable_data
<
Response
>
();
const
GeneralBlob
*
input_blob
;
for
(
uint32_t
i
=
0
;
i
<
pre_node_names
.
size
();
++
i
)
{
VLOG
(
2
)
<<
"pre names["
<<
i
<<
"]: "
<<
pre_node_names
[
i
]
<<
" ("
<<
pre_node_names
.
size
()
<<
")"
;
input_blob
=
get_depend_argument
<
GeneralBlob
>
(
pre_node_names
[
i
]);
fprintf
(
stderr
,
"input(%s) blob address %x
\n
"
,
pre_node_names
[
i
].
c_str
(),
input_blob
);
if
(
!
input_blob
)
{
LOG
(
ERROR
)
<<
"Failed mutable depended argument, op: "
<<
pre_node_names
[
0
];
return
-
1
;
}
const
TensorVector
*
in
=
&
input_blob
->
tensor_vector
;
int
batch_size
=
input_blob
->
GetBatchSize
();
VLOG
(
2
)
<<
"input batch size: "
<<
batch_size
;
//TODO
ModelOutput
*
output
=
res
->
add_outputs
();
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
FetchInst
*
fetch_inst
=
res
->
add_insts
();
FetchInst
*
fetch_inst
=
output
->
add_insts
();
for
(
auto
&
idx
:
fetch_index
)
{
Tensor
*
tensor
=
fetch_inst
->
add_tensor_array
();
// currently only response float tensor or lod_tensor
...
...
@@ -108,7 +114,7 @@ int GeneralResponseOp::inference() {
for
(
int
j
=
0
;
j
<
batch_size
;
++
j
)
{
for
(
int
k
=
in
->
at
(
idx
).
lod
[
0
][
j
];
k
<
in
->
at
(
idx
).
lod
[
0
][
j
+
1
];
k
++
)
{
FetchInst
*
fetch_p
=
res
->
mutable_insts
(
j
);
FetchInst
*
fetch_p
=
output
->
mutable_insts
(
j
);
fetch_p
->
mutable_tensor_array
(
var_idx
)
->
add_int64_data
(
data_ptr
[
k
]);
}
}
...
...
@@ -117,14 +123,14 @@ int GeneralResponseOp::inference() {
if
(
var_size
==
batch_size
)
{
for
(
int
j
=
0
;
j
<
batch_size
;
++
j
)
{
for
(
int
k
=
j
*
cap
;
k
<
(
j
+
1
)
*
cap
;
++
k
)
{
FetchInst
*
fetch_p
=
res
->
mutable_insts
(
j
);
FetchInst
*
fetch_p
=
output
->
mutable_insts
(
j
);
fetch_p
->
mutable_tensor_array
(
var_idx
)
->
add_int64_data
(
data_ptr
[
k
]);
}
}
}
else
{
for
(
int
j
=
0
;
j
<
batch_size
;
++
j
)
{
FetchInst
*
fetch_p
=
res
->
mutable_insts
(
j
);
FetchInst
*
fetch_p
=
output
->
mutable_insts
(
j
);
fetch_p
->
mutable_tensor_array
(
var_idx
)
->
add_int64_data
(
data_ptr
[
0
]);
}
}
...
...
@@ -136,7 +142,7 @@ int GeneralResponseOp::inference() {
for
(
int
j
=
0
;
j
<
batch_size
;
++
j
)
{
for
(
int
k
=
in
->
at
(
idx
).
lod
[
0
][
j
];
k
<
in
->
at
(
idx
).
lod
[
0
][
j
+
1
];
k
++
)
{
FetchInst
*
fetch_p
=
res
->
mutable_insts
(
j
);
FetchInst
*
fetch_p
=
output
->
mutable_insts
(
j
);
fetch_p
->
mutable_tensor_array
(
var_idx
)
->
add_float_data
(
data_ptr
[
k
]);
}
}
...
...
@@ -145,14 +151,14 @@ int GeneralResponseOp::inference() {
if
(
var_size
==
batch_size
)
{
for
(
int
j
=
0
;
j
<
batch_size
;
++
j
)
{
for
(
int
k
=
j
*
cap
;
k
<
(
j
+
1
)
*
cap
;
++
k
)
{
FetchInst
*
fetch_p
=
res
->
mutable_insts
(
j
);
FetchInst
*
fetch_p
=
output
->
mutable_insts
(
j
);
fetch_p
->
mutable_tensor_array
(
var_idx
)
->
add_float_data
(
data_ptr
[
k
]);
}
}
}
else
{
for
(
int
j
=
0
;
j
<
batch_size
;
++
j
)
{
FetchInst
*
fetch_p
=
res
->
mutable_insts
(
j
);
FetchInst
*
fetch_p
=
output
->
mutable_insts
(
j
);
fetch_p
->
mutable_tensor_array
(
var_idx
)
->
add_float_data
(
data_ptr
[
0
]);
}
}
...
...
@@ -160,12 +166,17 @@ int GeneralResponseOp::inference() {
var_idx
++
;
}
}
}
if
(
req
->
profile_server
())
{
int64_t
end
=
timeline
.
TimeStampUS
();
for
(
uint32_t
i
=
0
;
i
<
pre_node_names
.
size
();
++
i
)
{
input_blob
=
get_depend_argument
<
GeneralBlob
>
(
pre_node_names
[
i
]);
VLOG
(
2
)
<<
"p size for input blob: "
<<
input_blob
->
p_size
;
ModelOutput
*
output
=
res
->
mutable_outputs
(
i
);
for
(
int
i
=
0
;
i
<
input_blob
->
p_size
;
++
i
)
{
res
->
add_profile_time
(
input_blob
->
time_stamp
[
i
]);
output
->
add_profile_time
(
input_blob
->
time_stamp
[
i
]);
}
}
// TODO(guru4elephant): find more elegant way to do this
res
->
add_profile_time
(
start
);
...
...
core/general-server/op/general_text_response_op.cpp
浏览文件 @
f0e5a0bd
...
...
@@ -39,13 +39,17 @@ int GeneralTextResponseOp::inference() {
const
std
::
vector
<
std
::
string
>
pre_node_names
=
pre_names
();
VLOG
(
2
)
<<
"pre node names size: "
<<
pre_node_names
.
size
();
const
GeneralBlob
*
input_blob
=
get_depend_argument
<
GeneralBlob
>
(
pre_node_names
[
0
]);
const
GeneralBlob
*
input_blob
=
get_depend_argument
<
GeneralBlob
>
(
pre_node_names
[
0
]);
if
(
!
input_blob
)
{
LOG
(
ERROR
)
<<
"Failed mutable depended argument, op: "
<<
pre_node_names
[
0
];
return
-
1
;
}
LOG
(
ERROR
)
<<
"Error!"
;
return
-
1
;
/*
const TensorVector *in = &input_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize();
...
...
@@ -127,7 +131,7 @@ int GeneralTextResponseOp::inference() {
// TODO(guru4elephant): find more elegant way to do this
res->add_profile_time(start);
res->add_profile_time(end);
}
}
*/
return
0
;
}
...
...
core/general-server/proto/general_model_service.proto
浏览文件 @
f0e5a0bd
...
...
@@ -39,10 +39,15 @@ message Request {
};
message
Response
{
repeated
FetchInst
ins
ts
=
1
;
repeated
ModelOutput
outpu
ts
=
1
;
repeated
int64
profile_time
=
2
;
};
message
ModelOutput
{
repeated
FetchInst
insts
=
1
;
repeated
int64
profile_time
=
2
;
}
service
GeneralModelService
{
rpc
inference
(
Request
)
returns
(
Response
);
rpc
debug
(
Request
)
returns
(
Response
);
...
...
core/predictor/framework/dag_view.cpp
浏览文件 @
f0e5a0bd
...
...
@@ -92,7 +92,8 @@ int DagView::init(Dag* dag, const std::string& service_name) {
vnode
->
op
=
op
;
// Add depends
for
(
auto
it
=
vnode
->
conf
->
depends
.
begin
();
it
!=
vnode
->
conf
->
depends
.
end
();
++
it
)
{
it
!=
vnode
->
conf
->
depends
.
end
();
++
it
)
{
std
::
string
pre_node_name
=
it
->
first
;
VLOG
(
2
)
<<
"add op pre name:
\n
"
<<
"current op name: "
<<
vnode
->
op
->
op_name
()
...
...
@@ -103,11 +104,11 @@ int DagView::init(Dag* dag, const std::string& service_name) {
}
// TODO(guru4elephant): this seems buggy, please review later
/*if (si > 0) {*/
//
VLOG(2) << "set op pre name: \n"
//
VLOG(2) << "set op pre name: \n"
//<< "current op name: " << vstage->nodes.back()->op->op_name()
//<< " previous op name: "
//<< _view[si - 1]->nodes.back()->op->op_name();
//
vstage->nodes.back()->op->set_pre_node_name(
//
vstage->nodes.back()->op->set_pre_node_name(
//_view[si - 1]->nodes.back()->op->op_name());
/*}*/
_view
.
push_back
(
vstage
);
...
...
core/predictor/framework/server.cpp
浏览文件 @
f0e5a0bd
...
...
@@ -28,6 +28,7 @@
#include "core/predictor/framework/manager.h"
#include "core/predictor/framework/resource.h"
#include "core/predictor/framework/service_manager.h"
#define BLOG(fmt, ...) printf("[%s:%s]:%d "fmt"\n", __FILE__, __FUNCTION__, __LINE__, ##__VA_ARGS__)
namespace
baidu
{
namespace
paddle_serving
{
...
...
@@ -85,6 +86,7 @@ int ServerManager::start_and_wait() {
boost
::
unordered_map
<
std
::
string
,
Service
*>::
iterator
it
;
for
(
it
=
_format_services
.
begin
();
it
!=
_format_services
.
end
();
it
++
)
{
BLOG
(
"
\n\n
service name: %s"
,
it
->
first
.
c_str
());
if
(
_server
.
AddService
(
it
->
second
,
brpc
::
SERVER_DOESNT_OWN_SERVICE
)
!=
0
)
{
LOG
(
ERROR
)
<<
"Failed to add service of format:"
<<
it
->
first
<<
"!"
;
return
-
1
;
...
...
core/predictor/framework/service.cpp
浏览文件 @
f0e5a0bd
...
...
@@ -139,6 +139,7 @@ const std::string& InferService::name() const { return _infer_service_format; }
int
InferService
::
inference
(
const
google
::
protobuf
::
Message
*
request
,
google
::
protobuf
::
Message
*
response
,
butil
::
IOBufBuilder
*
debug_os
)
{
BLOG
(
"
\n
=====> start to inference"
);
TRACEPRINTF
(
"start to inference"
);
// when funtion call begins, framework will reset
// thread local variables&resources automatically.
...
...
core/predictor/framework/workflow.cpp
浏览文件 @
f0e5a0bd
...
...
@@ -16,7 +16,9 @@
#include <string>
#include "core/predictor/common/inner_common.h"
#include "core/predictor/framework/predictor_metric.h" // PredictorMetric
#define BLOG(fmt, ...) printf("[%s:%s]:%d "fmt"\n", __FILE__, __FUNCTION__, __LINE__, ##__VA_ARGS__)
#define BLOG(fmt, ...) \
printf( \
"[%s:%s]:%d " fmt "\n", __FILE__, __FUNCTION__, __LINE__, ##__VA_ARGS__)
namespace
baidu
{
namespace
paddle_serving
{
...
...
core/predictor/op/op.h
浏览文件 @
f0e5a0bd
...
...
@@ -19,6 +19,9 @@
#include "core/predictor/framework/channel.h"
#include "core/predictor/framework/op_repository.h"
#include "core/predictor/framework/predictor_metric.h" // PredictorMetric
#include <cstdlib>
#define BLOG(fmt, ...) printf("[%s:%s]:%d "fmt"\n", __FILE__, __FUNCTION__, __LINE__, ##__VA_ARGS__)
#include<stdexcept>
namespace
baidu
{
namespace
paddle_serving
{
...
...
@@ -94,10 +97,6 @@ class Op {
template
<
typename
T
>
T
*
mutable_data
()
{
Channel
*
channel
=
mutable_channel
();
LOG
(
INFO
)
<<
"succ to get channel!"
;
auto
x
=
(
dynamic_cast
<
OpChannel
<
T
>*>
(
channel
))
->
data
();
LOG
(
INFO
)
<<
"succ to x!"
;
return
x
;
return
(
dynamic_cast
<
OpChannel
<
T
>*>
(
channel
))
->
data
();
}
...
...
@@ -136,7 +135,7 @@ class Op {
const
std
::
string
&
full_name
()
const
{
return
_full_name
;
}
//const std::string& pre_name() const { return _pre_node_name; }
//
const std::string& pre_name() const { return _pre_node_name; }
const
std
::
vector
<
std
::
string
>&
pre_names
()
const
{
return
_pre_node_names
;
}
void
set_full_name
(
const
std
::
string
full_name
)
{
_full_name
=
full_name
;
}
...
...
@@ -207,7 +206,7 @@ class Op {
Bus
*
_bus
;
Dag
*
_dag
;
uint32_t
_id
;
//std::string _pre_node_name; // only for sequential execution
//
std::string _pre_node_name; // only for sequential execution
std
::
vector
<
std
::
string
>
_pre_node_names
;
// for dag execution
std
::
string
_name
;
std
::
string
_full_name
;
// service_workflow_stageindex_opname
...
...
@@ -231,20 +230,19 @@ class OpWithChannel : public Op {
// ---------- Implements ----------
Channel
*
mutable_channel
()
{
LOG
(
INFO
)
<<
"op->mutable_data"
;
if
(
_channel
!=
NULL
)
{
LOG
(
INFO
)
<<
"op->mutable_data:
return _channel
"
;
LOG
(
INFO
)
<<
"op->mutable_data:
_channel != NULL
"
;
return
_channel
;
}
LOG
(
INFO
)
<<
"
op->mutable_data: _channel == NULL
"
;
LOG
(
INFO
)
<<
"
try to get_object: _channel
"
;
_channel
=
butil
::
get_object
<
ChannelType
>
();
//_channel = butil::get_object<ChannelType>();
//LOG(INFO) << butil::describe_objects<ChannelType>();
_channel
=
new
ChannelType
();
if
(
!
_channel
)
{
LOG
(
INFO
)
<<
"op->mutable_data: fail to get _channel"
;
LOG
(
ERROR
)
<<
"Failed mutable channel of type:"
<<
typeid
(
T
).
name
();
return
NULL
;
}
LOG
(
INFO
)
<<
"op->mutable_data: succ to get _channel"
;
_channel
->
init
(
this
->
id
(),
this
->
name
());
return
_channel
;
}
...
...
@@ -252,10 +250,15 @@ class OpWithChannel : public Op {
const
Channel
*
get_channel
()
const
{
return
_channel
;
}
int
release_channel
()
{
LOG
(
INFO
)
<<
"=====> _chaneel deinit"
;
if
(
_channel
)
{
_channel
->
deinit
();
butil
::
return_object
<
ChannelType
>
(
_channel
)
;
delete
_channel
;
}
/*if (_channel) {*/
//_channel->deinit();
//butil::return_object<ChannelType>(_channel);
/*}*/
_channel
=
NULL
;
return
0
;
...
...
core/predictor/src/pdserving.cpp
浏览文件 @
f0e5a0bd
...
...
@@ -32,6 +32,7 @@
#include "core/predictor/framework/server.h"
#include "core/predictor/framework/service.h"
#include "core/predictor/framework/workflow.h"
#define BLOG(fmt, ...) printf("[%s:%s]:%d "fmt"\n", __FILE__, __FUNCTION__, __LINE__, ##__VA_ARGS__)
using
baidu
::
paddle_serving
::
predictor
::
ServerManager
;
using
baidu
::
paddle_serving
::
predictor
::
WorkflowManager
;
...
...
@@ -217,6 +218,7 @@ int main(int argc, char** argv) {
FLAGS_stderrthreshold
=
3
;
#endif
BLOG
(
"
\n
ServerManager::instance().start_and_wait()
\n
"
);
if
(
ServerManager
::
instance
().
start_and_wait
()
!=
0
)
{
LOG
(
ERROR
)
<<
"Failed start server and wait!"
;
return
-
1
;
...
...
core/sdk-cpp/proto/general_model_service.proto
浏览文件 @
f0e5a0bd
...
...
@@ -39,10 +39,15 @@ message Request {
};
message
Response
{
repeated
FetchInst
ins
ts
=
1
;
repeated
ModelOutput
outpu
ts
=
1
;
repeated
int64
profile_time
=
2
;
};
message
ModelOutput
{
repeated
FetchInst
insts
=
1
;
repeated
int64
profile_time
=
2
;
}
service
GeneralModelService
{
rpc
inference
(
Request
)
returns
(
Response
);
rpc
debug
(
Request
)
returns
(
Response
);
...
...
ensemble-demo/client.py
浏览文件 @
f0e5a0bd
...
...
@@ -15,6 +15,7 @@
from
paddle_serving_client
import
Client
from
imdb_reader
import
IMDBDataset
import
sys
import
time
client
=
Client
()
client
.
load_client_config
(
'imdb_bow_client_conf/serving_client_conf.prototxt'
)
...
...
@@ -26,12 +27,13 @@ client.connect(["127.0.0.1:9393"])
imdb_dataset
=
IMDBDataset
()
imdb_dataset
.
load_resource
(
'imdb.vocab'
)
for
i
in
range
(
4
0
):
for
i
in
range
(
50
0
):
line
=
'i am very sad | 0'
word_ids
,
label
=
imdb_dataset
.
get_words_and_label
(
line
)
feed
=
{
"words"
:
word_ids
}
fetch
=
[
"acc"
,
"cost"
,
"prediction"
]
fetch_map
=
client
.
predict
(
feed
=
feed
,
fetch
=
fetch
)
print
(
"{} {}"
.
format
(
i
,
fetch_map
[
"prediction"
][
1
]))
exit
(
0
)
# time.sleep(1)
# exit(0)
print
(
'0.633530199528'
)
ensemble-demo/server.py
浏览文件 @
f0e5a0bd
...
...
@@ -36,7 +36,8 @@ op_seq_maker.add_op(response_op, dependent_nodes=[add_op])
server
=
Server
()
server
.
set_op_sequence
(
op_seq_maker
.
get_op_sequence
())
# server.load_model_config(sys.argv[1])
model_configs
=
{
'g1'
:
'imdb_bow_model'
,
'g2'
:
'imdb_cnn_model'
}
model_configs
=
{
'g1'
:
'imdb_bow_model'
,
'g2'
:
'imdb_bow_model'
}
# model_configs = {'g1': 'imdb_bow_model', 'g2': 'imdb_cnn_model'}
server
.
load_model_config
(
model_configs
)
server
.
prepare_server
(
workdir
=
"work_dir1"
,
port
=
9393
,
device
=
"cpu"
)
server
.
run_server
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录