Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Serving
提交
fcd66ef4
S
Serving
项目概览
PaddlePaddle
/
Serving
接近 2 年 前同步成功
通知
186
Star
833
Fork
253
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
105
列表
看板
标记
里程碑
合并请求
10
Wiki
2
Wiki
分析
仓库
DevOps
项目成员
Pages
S
Serving
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
105
Issue
105
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
2
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
fcd66ef4
编写于
1月 05, 2021
作者:
W
wangjiawei04
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'root/dygraph' into HEAD
上级
7ea59bda
695a7772
变更
16
隐藏空白更改
内联
并排
Showing
16 changed file
with
361 addition
and
1274 deletion
+361
-1274
cmake/external/boost.cmake
cmake/external/boost.cmake
+2
-2
cmake/external/brpc.cmake
cmake/external/brpc.cmake
+8
-4
cmake/paddlepaddle.cmake
cmake/paddlepaddle.cmake
+3
-3
core/configure/CMakeLists.txt
core/configure/CMakeLists.txt
+10
-14
core/general-server/op/general_dist_kv_infer_op.cpp
core/general-server/op/general_dist_kv_infer_op.cpp
+0
-136
core/general-server/op/general_dist_kv_quant_infer_op.cpp
core/general-server/op/general_dist_kv_quant_infer_op.cpp
+0
-15
core/general-server/op/general_infer_op.cpp
core/general-server/op/general_infer_op.cpp
+2
-36
core/general-server/op/general_reader_op.cpp
core/general-server/op/general_reader_op.cpp
+39
-126
core/general-server/op/general_response_op.cpp
core/general-server/op/general_response_op.cpp
+36
-141
core/predictor/CMakeLists.txt
core/predictor/CMakeLists.txt
+2
-3
core/predictor/framework/infer.h
core/predictor/framework/infer.h
+171
-90
paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
...inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
+42
-309
paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp
...e_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp
+0
-22
paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
...inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
+45
-350
paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp
...e_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp
+0
-22
python/examples/fit_a_line/local_train.py
python/examples/fit_a_line/local_train.py
+1
-1
未找到文件。
cmake/external/boost.cmake
浏览文件 @
fcd66ef4
...
...
@@ -22,8 +22,8 @@ set(BOOST_PROJECT "extern_boost")
# version of boost, say, 1.66.0, doesn't build on CentOS 6. We
# checked that the devtools package of CentOS 6 installs boost 1.41.0.
# So we use 1.41.0 here.
set
(
BOOST_VER
"1.
41
.0"
)
set
(
BOOST_TAR
"boost_1_
41
_0"
CACHE STRING
""
FORCE
)
set
(
BOOST_VER
"1.
74
.0"
)
set
(
BOOST_TAR
"boost_1_
74
_0"
CACHE STRING
""
FORCE
)
set
(
BOOST_URL
"http://paddlepaddledeps.cdn.bcebos.com/
${
BOOST_TAR
}
.tar.gz"
CACHE STRING
""
FORCE
)
MESSAGE
(
STATUS
"BOOST_TAR:
${
BOOST_TAR
}
, BOOST_URL:
${
BOOST_URL
}
"
)
...
...
cmake/external/brpc.cmake
浏览文件 @
fcd66ef4
...
...
@@ -13,6 +13,9 @@
# limitations under the License.
INCLUDE
(
ExternalProject
)
set
(
BRPC_CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
-Wno-narrowing"
)
set
(
BRPC_CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-narrowing"
)
set
(
BRPC_CMAKE_CPP_FLAGS
"
${
CMAKE_CPP_FLAGS
}
-Wno-narrowing"
)
find_package
(
OpenSSL REQUIRED
)
...
...
@@ -40,14 +43,15 @@ ExternalProject_Add(
extern_brpc
${
EXTERNAL_PROJECT_LOG_ARGS
}
# TODO(gongwb): change to de newst repo when they changed.
GIT_REPOSITORY
"https://github.com/
wangjiawei04/
brpc"
GIT_TAG
"
6d79e0b17f25107c35b705ea58d888083f59ff47
"
GIT_REPOSITORY
"https://github.com/
apache/incubator-
brpc"
GIT_TAG
"
master
"
PREFIX
${
BRPC_SOURCES_DIR
}
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_CXX_FLAGS=
${
BRPC_CMAKE_CXX_FLAGS
}
-DCMAKE_C_FLAGS=
${
BRPC_CMAKE_C_FLAGS
}
-DCMAKE_CPP_FLAGS=
${
BRPC_CMAKE_CPP_FLAGS
}
-DCMAKE_INSTALL_PREFIX=
${
BRPC_INSTALL_DIR
}
-DCMAKE_INSTALL_LIBDIR=
${
BRPC_INSTALL_DIR
}
/lib
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
...
...
cmake/paddlepaddle.cmake
浏览文件 @
fcd66ef4
...
...
@@ -31,11 +31,11 @@ message( "WITH_GPU = ${WITH_GPU}")
# Paddle Version should be one of:
# latest: latest develop build
# version number like 1.5.2
SET
(
PADDLE_VERSION
"
1.8.4
"
)
SET
(
PADDLE_VERSION
"
2.0.0-rc1
"
)
if
(
WITH_GPU
)
if
(
WITH_TRT
)
SET
(
PADDLE_LIB_VERSION
"
${
PADDLE_VERSION
}
-gpu-cuda10.1-cudnn7
.6
-avx-mkl-trt6"
)
SET
(
PADDLE_LIB_VERSION
"
${
PADDLE_VERSION
}
-gpu-cuda10.1-cudnn7-avx-mkl-trt6"
)
else
()
SET
(
PADDLE_LIB_VERSION
"
${
PADDLE_VERSION
}
-gpu-cuda10-cudnn7-avx-mkl"
)
endif
()
...
...
@@ -51,7 +51,7 @@ else()
endif
()
endif
()
SET
(
PADDLE_LIB_PATH
"http://paddle-inference-lib.bj.bcebos.com/
${
PADDLE_LIB_VERSION
}
/
fluid
_inference.tgz"
)
SET
(
PADDLE_LIB_PATH
"http://paddle-inference-lib.bj.bcebos.com/
${
PADDLE_LIB_VERSION
}
/
paddle
_inference.tgz"
)
MESSAGE
(
STATUS
"PADDLE_LIB_PATH=
${
PADDLE_LIB_PATH
}
"
)
if
(
WITH_GPU OR WITH_MKLML
)
if
(
WITH_TRT
)
...
...
core/configure/CMakeLists.txt
浏览文件 @
fcd66ef4
...
...
@@ -14,10 +14,6 @@ list(APPEND configure_srcs ${CMAKE_CURRENT_LIST_DIR}/src/configure_parser.cpp)
add_library
(
configure
${
configure_srcs
}
)
add_dependencies
(
configure brpc
)
add_executable
(
test_configure
${
CMAKE_CURRENT_LIST_DIR
}
/tests/test_configure.cpp
)
target_link_libraries
(
test_configure configure protobuf
)
install
(
TARGETS configure
ARCHIVE DESTINATION
${
PADDLE_SERVING_INSTALL_DIR
}
/lib
)
...
...
@@ -45,19 +41,19 @@ add_custom_target(sdk_configure_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E to
add_dependencies
(
sdk_configure_py_proto sdk_configure_py_proto_init
)
add_custom_command
(
TARGET sdk_configure_py_proto POST_BUILD
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_client/proto
COMMAND cp *.py
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_client/proto
COMMAND cp
-f
*.py
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_client/proto
COMMENT
"Copy generated python proto into directory paddle_serving_client/proto."
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
add_custom_command
(
TARGET general_model_config_py_proto POST_BUILD
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_client/proto
COMMAND cp *.py
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_client/proto
COMMAND cp
-f
*.py
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_client/proto
COMMENT
"Copy generated general_model_config proto file into directory paddle_serving_client/proto."
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
add_custom_command
(
TARGET multi_lang_general_model_service_py_proto POST_BUILD
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_client/proto
COMMAND cp *.py
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_client/proto
COMMAND cp
-f
*.py
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_client/proto
COMMENT
"Copy generated multi_lang_general_model_service proto file into directory paddle_serving_client/proto."
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
endif
()
...
...
@@ -65,7 +61,7 @@ endif()
if
(
APP
)
add_custom_command
(
TARGET general_model_config_py_proto POST_BUILD
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_app/proto
COMMAND cp *.py
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_app/proto
COMMAND cp
-f
*.py
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_app/proto
COMMENT
"Copy generated general_model_config proto file into directory paddle_serving_app/proto."
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
endif
()
...
...
@@ -77,26 +73,26 @@ add_dependencies(server_config_py_proto server_config_py_proto_init)
if
(
NOT WITH_GPU
)
add_custom_command
(
TARGET server_config_py_proto POST_BUILD
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_server/proto
COMMAND cp *.py
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_server/proto
COMMAND cp
-f
*.py
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_server/proto
COMMENT
"Copy generated python proto into directory paddle_serving_server/proto."
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINRARY_DIR
}
)
add_custom_command
(
TARGET general_model_config_py_proto POST_BUILD
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_server/proto
COMMAND cp *.py
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_server/proto
COMMAND cp
-f
*.py
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_server/proto
COMMENT
"Copy generated general_model_config proto file into directory paddle_serving_server/proto."
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
add_custom_command
(
TARGET multi_lang_general_model_service_py_proto POST_BUILD
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_server/proto
COMMAND cp *.py
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_server/proto
COMMAND cp
-f
*.py
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_server/proto
COMMENT
"Copy generated multi_lang_general_model_service proto file into directory paddle_serving_server/proto."
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
else
()
add_custom_command
(
TARGET server_config_py_proto POST_BUILD
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_server_gpu/proto
COMMAND cp *.py
COMMAND cp
-f
*.py
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_server_gpu/proto
COMMENT
"Copy generated python proto into directory
paddle_serving_server_gpu/proto."
...
...
@@ -105,7 +101,7 @@ add_custom_command(TARGET server_config_py_proto POST_BUILD
add_custom_command
(
TARGET general_model_config_py_proto POST_BUILD
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_server_gpu/proto
COMMAND cp *.py
COMMAND cp
-f
*.py
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_server_gpu/proto
COMMENT
"Copy generated general_model_config proto file into directory
paddle_serving_server_gpu/proto."
...
...
@@ -113,7 +109,7 @@ add_custom_command(TARGET general_model_config_py_proto POST_BUILD
add_custom_command
(
TARGET multi_lang_general_model_service_py_proto POST_BUILD
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_server_gpu/proto
COMMAND cp *.py
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_server_gpu/proto
COMMAND cp
-f
*.py
${
PADDLE_SERVING_BINARY_DIR
}
/python/paddle_serving_server_gpu/proto
COMMENT
"Copy generated multi_lang_general_model_service proto file into directory paddle_serving_server_gpu/proto."
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
endif
()
...
...
core/general-server/op/general_dist_kv_infer_op.cpp
浏览文件 @
fcd66ef4
...
...
@@ -39,142 +39,6 @@ using baidu::paddle_serving::predictor::InferManager;
using
baidu
::
paddle_serving
::
predictor
::
PaddleGeneralModelConfig
;
int
GeneralDistKVInferOp
::
inference
()
{
VLOG
(
2
)
<<
"Going to run inference"
;
const
std
::
vector
<
std
::
string
>
pre_node_names
=
pre_names
();
if
(
pre_node_names
.
size
()
!=
1
)
{
LOG
(
ERROR
)
<<
"This op("
<<
op_name
()
<<
") can only have one predecessor op, but received "
<<
pre_node_names
.
size
();
return
-
1
;
}
const
std
::
string
pre_name
=
pre_node_names
[
0
];
const
GeneralBlob
*
input_blob
=
get_depend_argument
<
GeneralBlob
>
(
pre_name
);
uint64_t
log_id
=
input_blob
->
GetLogId
();
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") Get precedent op name: "
<<
pre_name
;
GeneralBlob
*
output_blob
=
mutable_data
<
GeneralBlob
>
();
if
(
!
input_blob
)
{
LOG
(
ERROR
)
<<
"(logid="
<<
log_id
<<
") Failed mutable depended argument, op:"
<<
pre_name
;
return
-
1
;
}
const
TensorVector
*
in
=
&
input_blob
->
tensor_vector
;
TensorVector
*
out
=
&
output_blob
->
tensor_vector
;
int
batch_size
=
input_blob
->
GetBatchSize
();
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") input batch size: "
<<
batch_size
;
std
::
vector
<
uint64_t
>
keys
;
std
::
vector
<
rec
::
mcube
::
CubeValue
>
values
;
int
sparse_count
=
0
;
int
dense_count
=
0
;
std
::
vector
<
std
::
pair
<
int64_t
*
,
size_t
>>
dataptr_size_pairs
;
size_t
key_len
=
0
;
for
(
size_t
i
=
0
;
i
<
in
->
size
();
++
i
)
{
if
(
in
->
at
(
i
).
dtype
!=
paddle
::
PaddleDType
::
INT64
)
{
++
dense_count
;
continue
;
}
++
sparse_count
;
size_t
elem_num
=
1
;
for
(
size_t
s
=
0
;
s
<
in
->
at
(
i
).
shape
.
size
();
++
s
)
{
elem_num
*=
in
->
at
(
i
).
shape
[
s
];
}
key_len
+=
elem_num
;
int64_t
*
data_ptr
=
static_cast
<
int64_t
*>
(
in
->
at
(
i
).
data
.
data
());
dataptr_size_pairs
.
push_back
(
std
::
make_pair
(
data_ptr
,
elem_num
));
}
keys
.
resize
(
key_len
);
int
key_idx
=
0
;
for
(
size_t
i
=
0
;
i
<
dataptr_size_pairs
.
size
();
++
i
)
{
std
::
copy
(
dataptr_size_pairs
[
i
].
first
,
dataptr_size_pairs
[
i
].
first
+
dataptr_size_pairs
[
i
].
second
,
keys
.
begin
()
+
key_idx
);
key_idx
+=
dataptr_size_pairs
[
i
].
second
;
}
Timer
timeline
;
int64_t
cube_start
=
timeline
.
TimeStampUS
();
timeline
.
Start
();
rec
::
mcube
::
CubeAPI
*
cube
=
rec
::
mcube
::
CubeAPI
::
instance
();
std
::
vector
<
std
::
string
>
table_names
=
cube
->
get_table_names
();
if
(
table_names
.
size
()
==
0
)
{
LOG
(
ERROR
)
<<
"(logid="
<<
log_id
<<
") cube init error or cube config not given."
;
return
-
1
;
}
int
ret
=
cube
->
seek
(
table_names
[
0
],
keys
,
&
values
);
int64_t
cube_end
=
timeline
.
TimeStampUS
();
if
(
values
.
size
()
!=
keys
.
size
()
||
values
[
0
].
buff
.
size
()
==
0
)
{
LOG
(
ERROR
)
<<
"(logid="
<<
log_id
<<
") cube value return null"
;
}
size_t
EMBEDDING_SIZE
=
values
[
0
].
buff
.
size
()
/
sizeof
(
float
);
TensorVector
sparse_out
;
sparse_out
.
resize
(
sparse_count
);
TensorVector
dense_out
;
dense_out
.
resize
(
dense_count
);
int
cube_val_idx
=
0
;
int
sparse_idx
=
0
;
int
dense_idx
=
0
;
std
::
unordered_map
<
int
,
int
>
in_out_map
;
baidu
::
paddle_serving
::
predictor
::
Resource
&
resource
=
baidu
::
paddle_serving
::
predictor
::
Resource
::
instance
();
std
::
shared_ptr
<
PaddleGeneralModelConfig
>
model_config
=
resource
.
get_general_model_config
();
for
(
size_t
i
=
0
;
i
<
in
->
size
();
++
i
)
{
if
(
in
->
at
(
i
).
dtype
!=
paddle
::
PaddleDType
::
INT64
)
{
dense_out
[
dense_idx
]
=
in
->
at
(
i
);
++
dense_idx
;
continue
;
}
sparse_out
[
sparse_idx
].
lod
.
resize
(
in
->
at
(
i
).
lod
.
size
());
for
(
size_t
x
=
0
;
x
<
sparse_out
[
sparse_idx
].
lod
.
size
();
++
x
)
{
sparse_out
[
sparse_idx
].
lod
[
x
].
resize
(
in
->
at
(
i
).
lod
[
x
].
size
());
std
::
copy
(
in
->
at
(
i
).
lod
[
x
].
begin
(),
in
->
at
(
i
).
lod
[
x
].
end
(),
sparse_out
[
sparse_idx
].
lod
[
x
].
begin
());
}
sparse_out
[
sparse_idx
].
dtype
=
paddle
::
PaddleDType
::
FLOAT32
;
sparse_out
[
sparse_idx
].
shape
.
push_back
(
sparse_out
[
sparse_idx
].
lod
[
0
].
back
());
sparse_out
[
sparse_idx
].
shape
.
push_back
(
EMBEDDING_SIZE
);
sparse_out
[
sparse_idx
].
name
=
model_config
->
_feed_name
[
i
];
sparse_out
[
sparse_idx
].
data
.
Resize
(
sparse_out
[
sparse_idx
].
lod
[
0
].
back
()
*
EMBEDDING_SIZE
*
sizeof
(
float
));
float
*
dst_ptr
=
static_cast
<
float
*>
(
sparse_out
[
sparse_idx
].
data
.
data
());
for
(
int
x
=
0
;
x
<
sparse_out
[
sparse_idx
].
lod
[
0
].
back
();
++
x
)
{
float
*
data_ptr
=
dst_ptr
+
x
*
EMBEDDING_SIZE
;
memcpy
(
data_ptr
,
values
[
cube_val_idx
].
buff
.
data
(),
values
[
cube_val_idx
].
buff
.
size
());
cube_val_idx
++
;
}
++
sparse_idx
;
}
TensorVector
infer_in
;
infer_in
.
insert
(
infer_in
.
end
(),
dense_out
.
begin
(),
dense_out
.
end
());
infer_in
.
insert
(
infer_in
.
end
(),
sparse_out
.
begin
(),
sparse_out
.
end
());
output_blob
->
SetBatchSize
(
batch_size
);
output_blob
->
SetLogId
(
log_id
);
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") infer batch size: "
<<
batch_size
;
int64_t
start
=
timeline
.
TimeStampUS
();
if
(
InferManager
::
instance
().
infer
(
engine_name
().
c_str
(),
&
infer_in
,
out
,
batch_size
))
{
LOG
(
ERROR
)
<<
"(logid="
<<
log_id
<<
") Failed do infer in fluid model: "
<<
engine_name
();
return
-
1
;
}
int64_t
end
=
timeline
.
TimeStampUS
();
CopyBlobInfo
(
input_blob
,
output_blob
);
AddBlobInfo
(
output_blob
,
cube_start
);
AddBlobInfo
(
output_blob
,
cube_end
);
AddBlobInfo
(
output_blob
,
start
);
AddBlobInfo
(
output_blob
,
end
);
return
0
;
}
DEFINE_OP
(
GeneralDistKVInferOp
);
...
...
core/general-server/op/general_dist_kv_quant_infer_op.cpp
浏览文件 @
fcd66ef4
...
...
@@ -188,21 +188,6 @@ int GeneralDistKVQuantInferOp::inference() {
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") infer batch size: "
<<
batch_size
;
Timer
timeline
;
int64_t
start
=
timeline
.
TimeStampUS
();
timeline
.
Start
();
if
(
InferManager
::
instance
().
infer
(
engine_name
().
c_str
(),
&
infer_in
,
out
,
batch_size
))
{
LOG
(
ERROR
)
<<
"(logid="
<<
log_id
<<
") Failed do infer in fluid model: "
<<
engine_name
();
return
-
1
;
}
int64_t
end
=
timeline
.
TimeStampUS
();
CopyBlobInfo
(
input_blob
,
output_blob
);
AddBlobInfo
(
output_blob
,
start
);
AddBlobInfo
(
output_blob
,
end
);
return
0
;
}
DEFINE_OP
(
GeneralDistKVQuantInferOp
);
...
...
core/general-server/op/general_infer_op.cpp
浏览文件 @
fcd66ef4
...
...
@@ -44,45 +44,11 @@ int GeneralInferOp::inference() {
<<
pre_node_names
.
size
();
return
-
1
;
}
const
std
::
string
pre_name
=
pre_node_names
[
0
];
const
GeneralBlob
*
input_blob
=
get_depend_argument
<
GeneralBlob
>
(
pre_name
);
uint64_t
log_id
=
input_blob
->
GetLogId
();
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") Get precedent op name: "
<<
pre_name
;
GeneralBlob
*
output_blob
=
mutable_data
<
GeneralBlob
>
();
output_blob
->
SetLogId
(
log_id
);
if
(
!
input_blob
)
{
LOG
(
ERROR
)
<<
"(logid="
<<
log_id
<<
") Failed mutable depended argument, op:"
<<
pre_name
;
return
-
1
;
}
const
TensorVector
*
in
=
&
input_blob
->
tensor_vector
;
TensorVector
*
out
=
&
output_blob
->
tensor_vector
;
int
batch_size
=
input_blob
->
_batch_size
;
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") input batch size: "
<<
batch_size
;
output_blob
->
_batch_size
=
batch_size
;
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") infer batch size: "
<<
batch_size
;
Timer
timeline
;
int64_t
start
=
timeline
.
TimeStampUS
();
timeline
.
Start
();
if
(
InferManager
::
instance
().
infer
(
engine_name
().
c_str
(),
in
,
out
,
batch_size
))
{
LOG
(
ERROR
)
<<
"(logid="
<<
log_id
<<
") Failed do infer in fluid model: "
<<
engine_name
().
c_str
();
engine_name
().
c_str
()))
{
return
-
1
;
}
int64_t
end
=
timeline
.
TimeStampUS
();
CopyBlobInfo
(
input_blob
,
output_blob
);
AddBlobInfo
(
output_blob
,
start
);
AddBlobInfo
(
output_blob
,
end
);
std
::
cout
<<
"Infer Success"
<<
std
::
endl
;
return
0
;
}
DEFINE_OP
(
GeneralInferOp
);
...
...
core/general-server/op/general_reader_op.cpp
浏览文件 @
fcd66ef4
...
...
@@ -20,6 +20,7 @@
#include "core/general-server/op/general_infer_helper.h"
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/predictor/framework/resource.h"
#include "core/util/include/timer.h"
namespace
baidu
{
...
...
@@ -32,6 +33,7 @@ using baidu::paddle_serving::predictor::general_model::Tensor;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
Request
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
FeedInst
;
using
baidu
::
paddle_serving
::
predictor
::
PaddleGeneralModelConfig
;
using
baidu
::
paddle_serving
::
predictor
::
InferManager
;
int
conf_check
(
const
Request
*
req
,
const
std
::
shared_ptr
<
PaddleGeneralModelConfig
>
&
model_config
)
{
...
...
@@ -71,75 +73,34 @@ int conf_check(const Request *req,
int
GeneralReaderOp
::
inference
()
{
// reade request from client
const
Request
*
req
=
dynamic_cast
<
const
Request
*>
(
get_request_message
());
// TODO: only support one engine here
std
::
string
engine_name
=
"general_infer_0"
;
const
Request
*
req
=
dynamic_cast
<
const
Request
*>
(
get_request_message
());
uint64_t
log_id
=
req
->
log_id
();
int
input_var_num
=
0
;
std
::
vector
<
int64_t
>
elem_type
;
std
::
vector
<
int64_t
>
elem_size
;
std
::
vector
<
int64_t
>
capacity
;
GeneralBlob
*
res
=
mutable_data
<
GeneralBlob
>
();
TensorVector
*
out
=
&
res
->
tensor_vector
;
res
->
SetLogId
(
log_id
);
if
(
!
res
)
{
LOG
(
ERROR
)
<<
"(logid="
<<
log_id
<<
") Failed get op tls reader object output"
;
}
Timer
timeline
;
int64_t
start
=
timeline
.
TimeStampUS
();
int
var_num
=
req
->
insts
(
0
).
tensor_array_size
();
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") var num: "
<<
var_num
;
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") start to call load general model_conf op"
;
baidu
::
paddle_serving
::
predictor
::
Resource
&
resource
=
baidu
::
paddle_serving
::
predictor
::
Resource
::
instance
();
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") get resource pointer done."
;
std
::
shared_ptr
<
PaddleGeneralModelConfig
>
model_config
=
resource
.
get_general_model_config
();
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") print general model config done."
;
// TODO(guru4elephant): how to do conditional check?
/*
int ret = conf_check(req, model_config);
if (ret != 0) {
LOG(ERROR) << "model conf of server:";
resource.print_general_model_config(model_config);
return 0;
}
*/
// package tensor
elem_type
.
resize
(
var_num
);
elem_size
.
resize
(
var_num
);
capacity
.
resize
(
var_num
);
// prepare basic information for input
for
(
int
i
=
0
;
i
<
var_num
;
++
i
)
{
paddle
::
PaddleTensor
lod_tensor
;
elem_type
[
i
]
=
req
->
insts
(
0
).
tensor_array
(
i
).
elem_type
();
VLOG
(
2
)
<<
"var["
<<
i
<<
"] has elem type: "
<<
elem_type
[
i
];
if
(
elem_type
[
i
]
==
0
)
{
// int64
elem_size
[
i
]
=
sizeof
(
int64_t
);
lod_tensor
.
dtype
=
paddle
::
PaddleDType
::
INT64
;
}
else
if
(
elem_type
[
i
]
==
1
)
{
elem_size
[
i
]
=
sizeof
(
float
);
lod_tensor
.
dtype
=
paddle
::
PaddleDType
::
FLOAT32
;
}
else
if
(
elem_type
[
i
]
==
2
)
{
elem_size
[
i
]
=
sizeof
(
int32_t
);
lod_tensor
.
dtype
=
paddle
::
PaddleDType
::
INT32
;
}
// implement lod tensor here
std
::
string
tensor_name
=
model_config
->
_feed_name
[
i
];
std
::
cout
<<
"START Tensor Name: "
<<
tensor_name
<<
std
::
endl
;
auto
lod_tensor
=
InferManager
::
instance
().
GetInputHandle
(
engine_name
.
c_str
(),
tensor_name
.
c_str
());
std
::
cout
<<
"PICK lod tensor. "
<<
std
::
endl
;
std
::
vector
<
std
::
vector
<
size_t
>>
lod
;
std
::
vector
<
int
>
shape
;
// get lod info here
if
(
req
->
insts
(
0
).
tensor_array
(
i
).
lod_size
()
>
0
)
{
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") var["
<<
i
<<
"] is lod_tensor"
;
lod_tensor
.
lod
.
resize
(
1
);
lod
.
resize
(
1
);
for
(
int
k
=
0
;
k
<
req
->
insts
(
0
).
tensor_array
(
i
).
lod_size
();
++
k
)
{
lod
_tensor
.
lod
[
0
].
push_back
(
req
->
insts
(
0
).
tensor_array
(
i
).
lod
(
k
));
lod
[
0
].
push_back
(
req
->
insts
(
0
).
tensor_array
(
i
).
lod
(
k
));
}
capacity
[
i
]
=
1
;
for
(
int
k
=
0
;
k
<
req
->
insts
(
0
).
tensor_array
(
i
).
shape_size
();
++
k
)
{
...
...
@@ -147,103 +108,55 @@ int GeneralReaderOp::inference() {
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") shape for var["
<<
i
<<
"]: "
<<
dim
;
capacity
[
i
]
*=
dim
;
lod_tensor
.
shape
.
push_back
(
dim
);
shape
.
push_back
(
dim
);
}
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") var["
<<
i
<<
"] is tensor, capacity: "
<<
capacity
[
i
];
}
else
{
}
else
{
capacity
[
i
]
=
1
;
for
(
int
k
=
0
;
k
<
req
->
insts
(
0
).
tensor_array
(
i
).
shape_size
();
++
k
)
{
int
dim
=
req
->
insts
(
0
).
tensor_array
(
i
).
shape
(
k
);
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") shape for var["
<<
i
<<
"]: "
<<
dim
;
capacity
[
i
]
*=
dim
;
lod_tensor
.
shape
.
push_back
(
dim
);
shape
.
push_back
(
dim
);
}
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") var["
<<
i
<<
"] is tensor, capacity: "
<<
capacity
[
i
];
}
lod_tensor
.
name
=
model_config
->
_feed_name
[
i
];
out
->
push_back
(
lod_tensor
);
}
// specify the memory needed for output tensor_vector
for
(
int
i
=
0
;
i
<
var_num
;
++
i
)
{
if
(
out
->
at
(
i
).
lod
.
size
()
==
1
)
{
int
tensor_size
=
0
;
const
Tensor
&
tensor
=
req
->
insts
(
0
).
tensor_array
(
i
);
int
data_len
=
0
;
if
(
tensor
.
int64_data_size
()
>
0
)
{
data_len
=
tensor
.
int64_data_size
();
}
else
if
(
tensor
.
float_data_size
()
>
0
)
{
data_len
=
tensor
.
float_data_size
();
}
else
if
(
tensor
.
int_data_size
()
>
0
)
{
data_len
=
tensor
.
int_data_size
();
}
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") tensor size for var["
<<
i
<<
"]: "
<<
data_len
;
tensor_size
+=
data_len
;
int
cur_len
=
out
->
at
(
i
).
lod
[
0
].
back
();
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") current len: "
<<
cur_len
;
int
sample_len
=
0
;
if
(
tensor
.
shape_size
()
==
1
)
{
sample_len
=
data_len
;
}
else
{
sample_len
=
tensor
.
shape
(
0
);
}
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") new len: "
<<
cur_len
+
sample_len
;
out
->
at
(
i
).
data
.
Resize
(
tensor_size
*
elem_size
[
i
]);
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") var["
<<
i
<<
"] is lod_tensor and len="
<<
out
->
at
(
i
).
lod
[
0
].
back
();
}
else
{
out
->
at
(
i
).
data
.
Resize
(
capacity
[
i
]
*
elem_size
[
i
]);
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") var["
<<
i
<<
"] is tensor and capacity="
<<
capacity
[
i
];
}
}
// fill the data into output general_blob
for
(
int
i
=
0
;
i
<
var_num
;
++
i
)
{
if
(
elem_type
[
i
]
==
0
)
{
int64_t
*
dst_ptr
=
static_cast
<
int64_t
*>
(
out
->
at
(
i
).
data
.
data
());
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") first element data in var["
<<
i
<<
"] is "
<<
req
->
insts
(
0
).
tensor_array
(
i
).
int64_data
(
0
);
int
offset
=
0
;
lod_tensor
->
SetLoD
(
lod
);
lod_tensor
->
Reshape
(
shape
);
std
::
cout
<<
"FINI Set Lod and Reshape, and elem type: "
<<
elem_type
[
i
]
<<
std
::
endl
;
// insert data here
if
(
req
->
insts
(
0
).
tensor_array
(
i
).
elem_type
()
==
0
)
{
// TODO: Copy twice here, can optimize
int
elem_num
=
req
->
insts
(
0
).
tensor_array
(
i
).
int64_data_size
();
std
::
vector
<
int64_t
>
data
(
elem_num
);
int64_t
*
dst_ptr
=
data
.
data
();
for
(
int
k
=
0
;
k
<
elem_num
;
++
k
)
{
dst_ptr
[
offset
+
k
]
=
req
->
insts
(
0
).
tensor_array
(
i
).
int64_data
(
k
);
dst_ptr
[
k
]
=
req
->
insts
(
0
).
tensor_array
(
i
).
int64_data
(
k
);
}
}
else
if
(
elem_type
[
i
]
==
1
)
{
float
*
dst_ptr
=
static_cast
<
float
*>
(
out
->
at
(
i
).
data
.
data
());
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") first element data in var["
<<
i
<<
"] is "
<<
req
->
insts
(
0
).
tensor_array
(
i
).
float_data
(
0
);
int
offset
=
0
;
lod_tensor
->
CopyFromCpu
(
dst_ptr
);
}
else
if
(
req
->
insts
(
0
).
tensor_array
(
i
).
elem_type
()
==
1
)
{
int
elem_num
=
req
->
insts
(
0
).
tensor_array
(
i
).
float_data_size
();
std
::
vector
<
float
>
data
(
elem_num
);
float
*
dst_ptr
=
data
.
data
();
for
(
int
k
=
0
;
k
<
elem_num
;
++
k
)
{
dst_ptr
[
offset
+
k
]
=
req
->
insts
(
0
).
tensor_array
(
i
).
float_data
(
k
);
dst_ptr
[
k
]
=
req
->
insts
(
0
).
tensor_array
(
i
).
float_data
(
k
);
}
}
else
if
(
elem_type
[
i
]
==
2
)
{
int32_t
*
dst_ptr
=
static_cast
<
int32_t
*>
(
out
->
at
(
i
).
data
.
data
());
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") first element data in var["
<<
i
<<
"] is "
<<
req
->
insts
(
0
).
tensor_array
(
i
).
int_data
(
0
);
int
offset
=
0
;
lod_tensor
->
CopyFromCpu
(
dst_ptr
);
}
else
if
(
req
->
insts
(
0
).
tensor_array
(
i
).
elem_type
()
==
2
)
{
int
elem_num
=
req
->
insts
(
0
).
tensor_array
(
i
).
int_data_size
();
std
::
vector
<
int32_t
>
data
(
elem_num
);
int32_t
*
dst_ptr
=
data
.
data
();
for
(
int
k
=
0
;
k
<
elem_num
;
++
k
)
{
dst_ptr
[
offset
+
k
]
=
req
->
insts
(
0
).
tensor_array
(
i
).
int_data
(
k
);
dst_ptr
[
k
]
=
req
->
insts
(
0
).
tensor_array
(
i
).
int_data
(
k
);
}
lod_tensor
->
CopyFromCpu
(
dst_ptr
);
}
std
::
cout
<<
"FINISH Tensor Name: "
<<
tensor_name
<<
std
::
endl
;
}
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") output size: "
<<
out
->
size
();
timeline
.
Pause
();
int64_t
end
=
timeline
.
TimeStampUS
();
res
->
p_size
=
0
;
res
->
_batch_size
=
1
;
AddBlobInfo
(
res
,
start
);
AddBlobInfo
(
res
,
end
);
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") read data from client success"
;
return
0
;
}
DEFINE_OP
(
GeneralReaderOp
);
...
...
core/general-server/op/general_response_op.cpp
浏览文件 @
fcd66ef4
...
...
@@ -40,160 +40,55 @@ using baidu::paddle_serving::predictor::InferManager;
using
baidu
::
paddle_serving
::
predictor
::
PaddleGeneralModelConfig
;
int
GeneralResponseOp
::
inference
()
{
const
std
::
vector
<
std
::
string
>
pre_node_names
=
pre_names
();
VLOG
(
2
)
<<
"pre node names size: "
<<
pre_node_names
.
size
();
const
GeneralBlob
*
input_blob
;
uint64_t
log_id
=
get_depend_argument
<
GeneralBlob
>
(
pre_node_names
[
0
])
->
GetLogId
();
const
Request
*
req
=
dynamic_cast
<
const
Request
*>
(
get_request_message
());
// response inst with only fetch_var_names
Response
*
res
=
mutable_data
<
Response
>
();
Timer
timeline
;
// double response_time = 0.0;
// timeline.Start();
int64_t
start
=
timeline
.
TimeStampUS
();
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") start to call load general model_conf op"
;
baidu
::
paddle_serving
::
predictor
::
Resource
&
resource
=
baidu
::
paddle_serving
::
predictor
::
Resource
::
instance
();
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") get resource pointer done."
;
std
::
shared_ptr
<
PaddleGeneralModelConfig
>
model_config
=
resource
.
get_general_model_config
();
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") max body size : "
<<
brpc
::
fLU64
::
FLAGS_max_body_size
;
std
::
vector
<
int
>
fetch_index
;
fetch_index
.
resize
(
req
->
fetch_var_names_size
());
std
::
vector
<
int
>
capacity
(
req
->
fetch_var_names_size
(),
1
);
std
::
string
engine_name
=
"general_infer_0"
;
ModelOutput
*
output
=
res
->
add_outputs
()
;
FetchInst
*
fetch_inst
=
output
->
add_insts
();
FetchInst
*
fetch_p
=
output
->
mutable_insts
(
0
)
;
std
::
vector
<
std
::
string
>
outs
=
InferManager
::
instance
().
GetOutputNames
(
engine_name
.
c_str
());
for
(
int
i
=
0
;
i
<
req
->
fetch_var_names_size
();
++
i
)
{
fetch_index
[
i
]
=
model_config
->
_fetch_alias_name_to_index
[
req
->
fetch_var_names
(
i
)];
}
for
(
uint32_t
pi
=
0
;
pi
<
pre_node_names
.
size
();
++
pi
)
{
const
std
::
string
&
pre_name
=
pre_node_names
[
pi
];
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") pre names["
<<
pi
<<
"]: "
<<
pre_name
<<
" ("
<<
pre_node_names
.
size
()
<<
")"
;
input_blob
=
get_depend_argument
<
GeneralBlob
>
(
pre_name
);
// fprintf(stderr, "input(%s) blob address %x\n", pre_names.c_str(),
// input_blob);
if
(
!
input_blob
)
{
LOG
(
ERROR
)
<<
"(logid="
<<
log_id
<<
") Failed mutable depended argument, op: "
<<
pre_name
;
return
-
1
;
Tensor
*
tensor
=
fetch_inst
->
add_tensor_array
();
std
::
string
tensor_name
=
outs
[
i
];
auto
lod_tensor
=
InferManager
::
instance
().
GetOutputHandle
(
engine_name
.
c_str
(),
tensor_name
.
c_str
());
std
::
vector
<
int
>
shape
=
lod_tensor
->
shape
();
for
(
int
k
=
0
;
k
<
shape
.
size
();
++
k
)
{
capacity
[
i
]
*=
shape
[
k
];
tensor
->
add_shape
(
shape
[
k
]);
}
const
TensorVector
*
in
=
&
input_blob
->
tensor_vector
;
ModelOutput
*
output
=
res
->
add_outputs
();
// To get the order of model return values
output
->
set_engine_name
(
pre_name
);
FetchInst
*
fetch_inst
=
output
->
add_insts
();
for
(
auto
&
idx
:
fetch_index
)
{
Tensor
*
tensor
=
fetch_inst
->
add_tensor_array
();
if
(
model_config
->
_is_lod_fetch
[
idx
])
{
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") out["
<<
idx
<<
"] "
<<
model_config
->
_fetch_name
[
idx
]
<<
" is lod_tensor"
;
for
(
int
k
=
0
;
k
<
in
->
at
(
idx
).
shape
.
size
();
++
k
)
{
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") shape["
<<
k
<<
"]: "
<<
in
->
at
(
idx
).
shape
[
k
];
tensor
->
add_shape
(
in
->
at
(
idx
).
shape
[
k
]);
}
}
else
{
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") out["
<<
idx
<<
"] "
<<
model_config
->
_fetch_name
[
idx
]
<<
" is tensor"
;
for
(
int
k
=
0
;
k
<
in
->
at
(
idx
).
shape
.
size
();
++
k
)
{
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") shape["
<<
k
<<
"]: "
<<
in
->
at
(
idx
).
shape
[
k
];
tensor
->
add_shape
(
in
->
at
(
idx
).
shape
[
k
]);
}
}
auto
dtype
=
lod_tensor
->
type
();
if
(
dtype
==
paddle
::
PaddleDType
::
INT64
)
{
std
::
vector
<
int64_t
>
datas
(
capacity
[
i
]);
int64_t
*
data_ptr
=
datas
.
data
();
lod_tensor
->
CopyToCpu
(
data_ptr
);
google
::
protobuf
::
RepeatedField
<
int64_t
>
tmp_data
(
data_ptr
,
data_ptr
+
capacity
[
i
]);
tensor
->
mutable_int64_data
()
->
Swap
(
&
tmp_data
);
}
else
if
(
dtype
==
paddle
::
PaddleDType
::
FLOAT32
)
{
std
::
vector
<
float
>
datas
(
capacity
[
i
]);
float
*
data_ptr
=
datas
.
data
();
lod_tensor
->
CopyToCpu
(
data_ptr
);
google
::
protobuf
::
RepeatedField
<
float
>
tmp_data
(
data_ptr
,
data_ptr
+
capacity
[
i
]);
tensor
->
mutable_float_data
()
->
Swap
(
&
tmp_data
);
}
else
if
(
dtype
==
paddle
::
PaddleDType
::
INT32
)
{
std
::
vector
<
int32_t
>
datas
(
capacity
[
i
]);
int32_t
*
data_ptr
=
datas
.
data
();
lod_tensor
->
CopyToCpu
(
data_ptr
);
google
::
protobuf
::
RepeatedField
<
int32_t
>
tmp_data
(
data_ptr
,
data_ptr
+
capacity
[
i
]);
tensor
->
mutable_int_data
()
->
Swap
(
&
tmp_data
);
}
int
var_idx
=
0
;
for
(
auto
&
idx
:
fetch_index
)
{
int
cap
=
1
;
for
(
int
j
=
0
;
j
<
in
->
at
(
idx
).
shape
.
size
();
++
j
)
{
cap
*=
in
->
at
(
idx
).
shape
[
j
];
std
::
vector
<
std
::
vector
<
size_t
>>
lod
=
lod_tensor
->
lod
();
if
(
lod
.
size
()
>
0
)
{
for
(
int
j
=
0
;
j
<
lod
[
0
].
size
();
++
j
)
{
tensor
->
add_lod
(
lod
[
0
][
j
]);
}
FetchInst
*
fetch_p
=
output
->
mutable_insts
(
0
);
auto
dtype
=
in
->
at
(
idx
).
dtype
;
if
(
dtype
==
paddle
::
PaddleDType
::
INT64
)
{
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") Prepare int64 var ["
<<
model_config
->
_fetch_name
[
idx
]
<<
"]."
;
int64_t
*
data_ptr
=
static_cast
<
int64_t
*>
(
in
->
at
(
idx
).
data
.
data
());
// from
// https://stackoverflow.com/questions/15499641/copy-a-stdvector-to-a-repeated-field-from-protobuf-with-memcpy
// `Swap` method is faster than `{}` method.
google
::
protobuf
::
RepeatedField
<
int64_t
>
tmp_data
(
data_ptr
,
data_ptr
+
cap
);
fetch_p
->
mutable_tensor_array
(
var_idx
)
->
mutable_int64_data
()
->
Swap
(
&
tmp_data
);
}
else
if
(
dtype
==
paddle
::
PaddleDType
::
FLOAT32
)
{
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") Prepare float var ["
<<
model_config
->
_fetch_name
[
idx
]
<<
"]."
;
float
*
data_ptr
=
static_cast
<
float
*>
(
in
->
at
(
idx
).
data
.
data
());
google
::
protobuf
::
RepeatedField
<
float
>
tmp_data
(
data_ptr
,
data_ptr
+
cap
);
fetch_p
->
mutable_tensor_array
(
var_idx
)
->
mutable_float_data
()
->
Swap
(
&
tmp_data
);
}
else
if
(
dtype
==
paddle
::
PaddleDType
::
INT32
)
{
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
")Prepare int32 var ["
<<
model_config
->
_fetch_name
[
idx
]
<<
"]."
;
int32_t
*
data_ptr
=
static_cast
<
int32_t
*>
(
in
->
at
(
idx
).
data
.
data
());
google
::
protobuf
::
RepeatedField
<
int32_t
>
tmp_data
(
data_ptr
,
data_ptr
+
cap
);
fetch_p
->
mutable_tensor_array
(
var_idx
)
->
mutable_int_data
()
->
Swap
(
&
tmp_data
);
}
if
(
model_config
->
_is_lod_fetch
[
idx
])
{
if
(
in
->
at
(
idx
).
lod
.
size
()
>
0
)
{
for
(
int
j
=
0
;
j
<
in
->
at
(
idx
).
lod
[
0
].
size
();
++
j
)
{
fetch_p
->
mutable_tensor_array
(
var_idx
)
->
add_lod
(
in
->
at
(
idx
).
lod
[
0
][
j
]);
}
}
}
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") fetch var ["
<<
model_config
->
_fetch_name
[
idx
]
<<
"] ready"
;
var_idx
++
;
}
}
if
(
req
->
profile_server
())
{
int64_t
end
=
timeline
.
TimeStampUS
();
// TODO(barriery): multi-model profile_time.
// At present, only the response_op is multi-input, so here we get
// the profile_time by hard coding. It needs to be replaced with
// a more elegant way.
for
(
uint32_t
pi
=
0
;
pi
<
pre_node_names
.
size
();
++
pi
)
{
input_blob
=
get_depend_argument
<
GeneralBlob
>
(
pre_node_names
[
pi
]);
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") p size for input blob: "
<<
input_blob
->
p_size
;
int
profile_time_idx
=
-
1
;
if
(
pi
==
0
)
{
profile_time_idx
=
0
;
}
else
{
profile_time_idx
=
input_blob
->
p_size
-
2
;
}
for
(;
profile_time_idx
<
input_blob
->
p_size
;
++
profile_time_idx
)
{
res
->
add_profile_time
(
input_blob
->
time_stamp
[
profile_time_idx
]);
}
}
// TODO(guru4elephant): find more elegant way to do this
res
->
add_profile_time
(
start
);
res
->
add_profile_time
(
end
);
}
return
0
;
}
...
...
core/predictor/CMakeLists.txt
浏览文件 @
fcd66ef4
...
...
@@ -12,13 +12,12 @@ set_source_files_properties(
${
pdserving_srcs
}
PROPERTIES
COMPILE_FLAGS
"-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor"
)
add_dependencies
(
pdserving protobuf boost brpc leveldb pdcodegen configure
)
add_dependencies
(
pdserving protobuf boost brpc leveldb pdcodegen configure
extern_paddle paddle_fluid
)
if
(
WITH_TRT
)
add_definitions
(
-DWITH_TRT
)
endif
()
target_link_libraries
(
pdserving
brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz
)
brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz paddle_fluid
${
paddle_depend_libs
}
)
# install
install
(
TARGETS pdserving
RUNTIME DESTINATION
${
PADDLE_SERVING_INSTALL_DIR
}
/bin
...
...
core/predictor/framework/infer.h
浏览文件 @
fcd66ef4
...
...
@@ -20,10 +20,9 @@
#include <utility>
#include <vector>
#include "core/predictor/common/inner_common.h"
#include "core/predictor/framework/bsf.h"
#include "core/predictor/framework/factory.h"
#include "core/predictor/framework/infer_data.h"
#include "paddle_inference_api.h" // NOLINT
namespace
baidu
{
namespace
paddle_serving
{
namespace
predictor
{
...
...
@@ -105,8 +104,8 @@ class InferEngine {
virtual
int
thrd_initialize
()
{
return
thrd_initialize_impl
();
}
virtual
int
thrd_clear
()
{
return
thrd_clear_impl
();
}
virtual
int
thrd_finalize
()
{
return
thrd_finalize_impl
();
}
virtual
int
infer
(
const
void
*
in
,
void
*
out
,
uint32_t
batch_size
=
-
1
)
{
return
infer_impl
1
(
in
,
out
,
batch_size
);
virtual
int
infer
()
{
return
infer_impl
(
);
}
virtual
int
reload
()
=
0
;
...
...
@@ -120,11 +119,11 @@ class InferEngine {
virtual
int
thrd_finalize_impl
()
=
0
;
virtual
int
thrd_clear_impl
()
=
0
;
virtual
int
proc_finalize_impl
()
=
0
;
virtual
int
infer_impl1
(
const
void
*
in
,
void
*
out
,
uint32_t
batch_size
=
-
1
)
=
0
;
virtual
int
infer_impl2
(
const
BatchTensor
&
in
,
BatchTensor
&
out
)
=
0
;
// NOLINT
virtual
std
::
vector
<
std
::
string
>
GetInputNames
()
=
0
;
virtual
std
::
vector
<
std
::
string
>
GetOutputNames
()
=
0
;
virtual
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
GetInputHandle
(
const
std
::
string
&
name
)
=
0
;
virtual
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
GetOutputHandle
(
const
std
::
string
&
name
)
=
0
;
virtual
int
infer_impl
()
=
0
;
// end: framework inner call
};
...
...
@@ -138,8 +137,6 @@ class ReloadableInferEngine : public InferEngine {
uint64_t
last_revision
;
};
typedef
im
::
bsf
::
Task
<
Tensor
,
Tensor
>
TaskT
;
virtual
int
load
(
const
InferEngineCreationParams
&
params
)
=
0
;
int
proc_initialize_impl
(
const
configure
::
EngineDesc
&
conf
,
bool
version
)
{
...
...
@@ -201,44 +198,11 @@ class ReloadableInferEngine : public InferEngine {
LOG
(
ERROR
)
<<
"Failed proc initialize impl"
;
return
-
1
;
}
// init bsf framework
if
(
_infer_thread_num
<=
0
)
{
return
0
;
}
im
::
bsf
::
TaskExecutor
<
TaskT
>::
instance
()
->
set_thread_init_fn
(
boost
::
bind
(
&
InferEngine
::
thrd_initialize_impl
,
this
));
im
::
bsf
::
TaskExecutor
<
TaskT
>::
instance
()
->
set_thread_reset_fn
(
boost
::
bind
(
&
InferEngine
::
thrd_clear_impl
,
this
));
im
::
bsf
::
TaskExecutor
<
TaskT
>::
instance
()
->
set_thread_callback_fn
(
boost
::
bind
(
&
InferEngine
::
infer_impl2
,
this
,
_1
,
_2
));
im
::
bsf
::
TaskExecutor
<
TaskT
>::
instance
()
->
set_batch_size
(
_infer_batch_size
);
im
::
bsf
::
TaskExecutor
<
TaskT
>::
instance
()
->
set_batch_align
(
_infer_batch_align
);
if
(
im
::
bsf
::
TaskExecutor
<
TaskT
>::
instance
()
->
start
(
_infer_thread_num
)
!=
0
)
{
LOG
(
ERROR
)
<<
"Failed start bsf executor, threads:"
<<
_infer_thread_num
;
return
-
1
;
}
LOG
(
WARNING
)
<<
"Enable batch schedule framework, thread_num:"
<<
_infer_thread_num
<<
", batch_size:"
<<
_infer_batch_size
<<
", enable_batch_align:"
<<
_infer_batch_align
;
return
0
;
}
int
infer
(
const
void
*
in
,
void
*
out
,
uint32_t
batch_size
=
-
1
)
{
if
(
_infer_thread_num
<=
0
)
{
return
infer_impl1
(
in
,
out
,
batch_size
);
}
im
::
bsf
::
TaskManager
<
Tensor
,
Tensor
>
task_manager
;
task_manager
.
schedule
(
*
(
reinterpret_cast
<
const
BatchTensor
*>
(
in
)),
*
(
reinterpret_cast
<
BatchTensor
*>
(
out
)));
task_manager
.
wait
();
return
0
;
int
infer
()
{
return
infer_impl
();
}
int
thrd_initialize
()
{
...
...
@@ -263,10 +227,6 @@ class ReloadableInferEngine : public InferEngine {
return
-
1
;
}
if
(
_infer_thread_num
>
0
)
{
im
::
bsf
::
TaskExecutor
<
TaskT
>::
instance
()
->
stop
();
}
return
0
;
}
...
...
@@ -417,10 +377,6 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
virtual
int
thrd_initialize_impl
()
{
// memory pool to be inited in non-serving-threads
if
(
MempoolWrapper
::
instance
().
thread_initialize
()
!=
0
)
{
LOG
(
ERROR
)
<<
"Failed thread initialize mempool"
;
return
-
1
;
}
ModelData
<
EngineCore
>*
md
=
new
(
std
::
nothrow
)
ModelData
<
EngineCore
>
;
if
(
!
md
||
load_data
(
md
,
_infer_engine_params
)
!=
0
)
{
...
...
@@ -430,17 +386,12 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
}
THREAD_SETSPECIFIC
(
_skey
,
md
);
im
::
bsf
::
AutoMutex
lock
(
_mutex
);
_reload_vec
.
push_back
(
md
);
return
0
;
}
int
thrd_clear_impl
()
{
// for non-serving-threads
if
(
MempoolWrapper
::
instance
().
thread_clear
()
!=
0
)
{
LOG
(
ERROR
)
<<
"Failed thread clear mempool"
;
return
-
1
;
}
return
0
;
}
...
...
@@ -538,11 +489,6 @@ class CloneDBReloadableInferEngine
}
virtual
int
thrd_initialize_impl
()
{
// memory pool to be inited in non-serving-threads
if
(
MempoolWrapper
::
instance
().
thread_initialize
()
!=
0
)
{
LOG
(
ERROR
)
<<
"Failed thread initialize mempool"
;
return
-
1
;
}
ModelData
<
EngineCore
>*
md
=
new
(
std
::
nothrow
)
ModelData
<
EngineCore
>
;
if
(
!
md
||
load_data
(
md
,
_pd
->
cores
[
_pd
->
current_idx
])
!=
0
)
{
...
...
@@ -552,7 +498,6 @@ class CloneDBReloadableInferEngine
}
THREAD_SETSPECIFIC
(
DBReloadableInferEngine
<
EngineCore
>::
_skey
,
md
);
im
::
bsf
::
AutoMutex
lock
(
DBReloadableInferEngine
<
EngineCore
>::
_mutex
);
DBReloadableInferEngine
<
EngineCore
>::
_reload_vec
.
push_back
(
md
);
return
0
;
}
...
...
@@ -571,25 +516,51 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
public:
// NOLINT
FluidInferEngine
()
{}
~
FluidInferEngine
()
{}
std
::
vector
<
std
::
string
>
GetInputNames
()
{
FluidFamilyCore
*
core
=
DBReloadableInferEngine
<
FluidFamilyCore
>::
get_core
();
if
(
!
core
||
!
core
->
get
())
{
LOG
(
ERROR
)
<<
"Failed get fluid core in GetInputHandle()"
;
}
return
core
->
GetInputNames
();
}
std
::
vector
<
std
::
string
>
GetOutputNames
()
{
FluidFamilyCore
*
core
=
DBReloadableInferEngine
<
FluidFamilyCore
>::
get_core
();
if
(
!
core
||
!
core
->
get
())
{
LOG
(
ERROR
)
<<
"Failed get fluid core in GetInputHandle()"
;
}
return
core
->
GetOutputNames
();
}
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
GetInputHandle
(
const
std
::
string
&
name
)
{
FluidFamilyCore
*
core
=
DBReloadableInferEngine
<
FluidFamilyCore
>::
get_core
();
if
(
!
core
||
!
core
->
get
())
{
LOG
(
ERROR
)
<<
"Failed get fluid core in GetInputHandle()"
;
}
return
core
->
GetInputHandle
(
name
);
}
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
GetOutputHandle
(
const
std
::
string
&
name
)
{
FluidFamilyCore
*
core
=
DBReloadableInferEngine
<
FluidFamilyCore
>::
get_core
();
if
(
!
core
||
!
core
->
get
())
{
LOG
(
ERROR
)
<<
"Failed get fluid core in GetOutputHandle()"
;
}
return
core
->
GetOutputHandle
(
name
);
}
int
infer_impl1
(
const
void
*
in
,
void
*
out
,
uint32_t
batch_size
=
-
1
)
{
FluidFamilyCore
*
core
=
DBReloadableInferEngine
<
FluidFamilyCore
>::
get_core
();
int
infer_impl
()
{
FluidFamilyCore
*
core
=
DBReloadableInferEngine
<
FluidFamilyCore
>::
get_core
();
if
(
!
core
||
!
core
->
get
())
{
LOG
(
ERROR
)
<<
"Failed get fluid core in infer_impl()"
;
return
-
1
;
}
if
(
!
core
->
Run
(
in
,
out
))
{
if
(
!
core
->
Run
())
{
LOG
(
ERROR
)
<<
"Failed run fluid family core"
;
return
-
1
;
}
return
0
;
}
int
infer_impl2
(
const
BatchTensor
&
in
,
BatchTensor
&
out
)
{
// NOLINT
return
infer_impl1
(
&
in
,
&
out
);
}
};
typedef
FactoryPool
<
InferEngine
>
StaticInferFactory
;
...
...
@@ -715,13 +686,43 @@ class VersionedInferEngine : public InferEngine {
return
_versions
.
begin
()
->
second
;
}
int
infer
(
const
void
*
in
,
void
*
out
,
uint32_t
batch_size
)
{
int
infer
()
{
InferEngine
*
engine
=
default_engine
();
if
(
!
engine
)
{
LOG
(
WARNING
)
<<
"fail to get default engine"
;
return
-
1
;
}
return
engine
->
infer
(
in
,
out
,
batch_size
);
return
engine
->
infer
();
}
std
::
vector
<
std
::
string
>
GetInputNames
()
{
InferEngine
*
engine
=
default_engine
();
if
(
!
engine
)
{
LOG
(
WARNING
)
<<
"fail to get default engine"
;
}
return
engine
->
GetInputNames
();
}
std
::
vector
<
std
::
string
>
GetOutputNames
()
{
InferEngine
*
engine
=
default_engine
();
if
(
!
engine
)
{
LOG
(
WARNING
)
<<
"fail to get default engine"
;
}
return
engine
->
GetOutputNames
();
}
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
GetInputHandle
(
const
std
::
string
&
name
)
{
InferEngine
*
engine
=
default_engine
();
if
(
!
engine
)
{
LOG
(
WARNING
)
<<
"fail to get default engine"
;
}
return
engine
->
GetInputHandle
(
name
);
}
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
GetOutputHandle
(
const
std
::
string
&
name
)
{
InferEngine
*
engine
=
default_engine
();
if
(
!
engine
)
{
LOG
(
WARNING
)
<<
"fail to get default engine"
;
}
return
engine
->
GetOutputHandle
(
name
);
}
template
<
typename
T
>
...
...
@@ -740,14 +741,45 @@ class VersionedInferEngine : public InferEngine {
}
// versioned inference interface
int
infer
(
const
void
*
in
,
void
*
out
,
uint32_t
batch_size
,
uint64_t
version
)
{
int
infer
(
uint64_t
version
)
{
auto
iter
=
_versions
.
find
(
version
);
if
(
iter
==
_versions
.
end
())
{
LOG
(
ERROR
)
<<
"Not found version engine: "
<<
version
;
return
-
1
;
}
return
iter
->
second
->
infer
(
in
,
out
,
batch_size
);
return
iter
->
second
->
infer
();
}
std
::
vector
<
std
::
string
>
GetInputNames
(
uint64_t
version
)
{
auto
iter
=
_versions
.
find
(
version
);
if
(
iter
==
_versions
.
end
())
{
LOG
(
ERROR
)
<<
"Not found version engine: "
<<
version
;
}
return
iter
->
second
->
GetInputNames
();
}
std
::
vector
<
std
::
string
>
GetOutputNames
(
uint64_t
version
)
{
auto
iter
=
_versions
.
find
(
version
);
if
(
iter
==
_versions
.
end
())
{
LOG
(
ERROR
)
<<
"Not found version engine: "
<<
version
;
}
return
iter
->
second
->
GetOutputNames
();
}
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
GetInputHandle
(
uint64_t
version
,
const
std
::
string
&
name
)
{
auto
iter
=
_versions
.
find
(
version
);
if
(
iter
==
_versions
.
end
())
{
LOG
(
ERROR
)
<<
"Not found version engine: "
<<
version
;
}
return
iter
->
second
->
GetInputHandle
(
name
);
}
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
GetOutputHandle
(
uint64_t
version
,
const
std
::
string
&
name
)
{
auto
iter
=
_versions
.
find
(
version
);
if
(
iter
==
_versions
.
end
())
{
LOG
(
ERROR
)
<<
"Not found version engine: "
<<
version
;
}
return
iter
->
second
->
GetOutputHandle
(
name
);
}
template
<
typename
T
>
...
...
@@ -774,12 +806,9 @@ class VersionedInferEngine : public InferEngine {
int
thrd_finalize_impl
()
{
return
-
1
;
}
int
thrd_clear_impl
()
{
return
-
1
;
}
int
proc_finalize_impl
()
{
return
-
1
;
}
int
infer_impl
1
(
const
void
*
in
,
void
*
out
,
uint32_t
batch_size
=
-
1
)
{
int
infer_impl
(
)
{
return
-
1
;
}
int
infer_impl2
(
const
BatchTensor
&
in
,
BatchTensor
&
out
)
{
// NOLINT
return
-
1
;
}
// NOLINT
private:
boost
::
unordered_map
<
uint64_t
,
InferEngine
*>
_versions
;
...
...
@@ -877,16 +906,42 @@ class InferManager {
}
// Inference interface
int
infer
(
const
char
*
model_name
,
const
void
*
in
,
void
*
out
,
uint32_t
batch_size
=
-
1
)
{
int
infer
(
const
char
*
model_name
)
{
auto
it
=
_map
.
find
(
model_name
);
if
(
it
==
_map
.
end
())
{
LOG
(
WARNING
)
<<
"Cannot find engine in map, model name:"
<<
model_name
;
return
-
1
;
}
return
it
->
second
->
infer
(
in
,
out
,
batch_size
);
return
it
->
second
->
infer
();
}
std
::
vector
<
std
::
string
>
GetInputNames
(
const
char
*
model_name
)
{
auto
it
=
_map
.
find
(
model_name
);
if
(
it
==
_map
.
end
())
{
LOG
(
WARNING
)
<<
"Cannot find engine in map, model name:"
<<
model_name
;
}
return
it
->
second
->
GetInputNames
();
}
std
::
vector
<
std
::
string
>
GetOutputNames
(
const
char
*
model_name
)
{
auto
it
=
_map
.
find
(
model_name
);
if
(
it
==
_map
.
end
())
{
LOG
(
WARNING
)
<<
"Cannot find engine in map, model name:"
<<
model_name
;
}
return
it
->
second
->
GetOutputNames
();
}
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
GetInputHandle
(
const
char
*
model_name
,
const
std
::
string
&
name
)
{
auto
it
=
_map
.
find
(
model_name
);
if
(
it
==
_map
.
end
())
{
LOG
(
WARNING
)
<<
"Cannot find engine in map, model name:"
<<
model_name
;
}
return
it
->
second
->
GetInputHandle
(
name
);
}
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
GetOutputHandle
(
const
char
*
model_name
,
const
std
::
string
&
name
)
{
auto
it
=
_map
.
find
(
model_name
);
if
(
it
==
_map
.
end
())
{
LOG
(
WARNING
)
<<
"Cannot find engine in map, model name:"
<<
model_name
;
}
return
it
->
second
->
GetOutputHandle
(
name
);
}
template
<
typename
T
>
...
...
@@ -907,18 +962,44 @@ class InferManager {
// Versioned inference interface
int
infer
(
const
char
*
model_name
,
const
void
*
in
,
void
*
out
,
uint32_t
batch_size
,
uint64_t
version
)
{
auto
it
=
_map
.
find
(
model_name
);
if
(
it
==
_map
.
end
())
{
LOG
(
WARNING
)
<<
"Cannot find engine in map, model name:"
<<
model_name
;
return
-
1
;
}
return
it
->
second
->
infer
(
in
,
out
,
batch_size
,
version
);
return
it
->
second
->
infer
(
version
);
}
std
::
vector
<
std
::
string
>
GetInputNames
(
const
char
*
model_name
,
uint64_t
version
)
{
auto
it
=
_map
.
find
(
model_name
);
if
(
it
==
_map
.
end
())
{
LOG
(
WARNING
)
<<
"Cannot find engine in map, model name:"
<<
model_name
;
}
return
it
->
second
->
GetInputNames
(
version
);
}
std
::
vector
<
std
::
string
>
GetOutputNames
(
const
char
*
model_name
,
uint64_t
version
)
{
auto
it
=
_map
.
find
(
model_name
);
if
(
it
==
_map
.
end
())
{
LOG
(
WARNING
)
<<
"Cannot find engine in map, model name:"
<<
model_name
;
}
return
it
->
second
->
GetOutputNames
(
version
);
}
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
GetInputHandle
(
const
char
*
model_name
,
uint64_t
version
,
const
std
::
string
&
name
)
{
auto
it
=
_map
.
find
(
model_name
);
if
(
it
==
_map
.
end
())
{
LOG
(
WARNING
)
<<
"Cannot find engine in map, model name:"
<<
model_name
;
}
return
it
->
second
->
GetInputHandle
(
version
,
name
);
}
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
GetOutputHandle
(
const
char
*
model_name
,
uint64_t
version
,
const
std
::
string
&
name
)
{
auto
it
=
_map
.
find
(
model_name
);
if
(
it
==
_map
.
end
())
{
LOG
(
WARNING
)
<<
"Cannot find engine in map, model name:"
<<
model_name
;
}
return
it
->
second
->
GetOutputHandle
(
version
,
name
);
}
template
<
typename
T
>
T
*
get_core
(
const
char
*
model_name
,
uint64_t
version
)
{
auto
it
=
_map
.
find
(
model_name
);
...
...
paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
浏览文件 @
fcd66ef4
...
...
@@ -28,8 +28,6 @@ namespace baidu {
namespace
paddle_serving
{
namespace
fluid_cpu
{
using
configure
::
SigmoidConf
;
class
AutoLock
{
public:
explicit
AutoLock
(
pthread_mutex_t
&
mutex
)
:
_mut
(
mutex
)
{
...
...
@@ -57,31 +55,36 @@ class GlobalPaddleCreateMutex {
pthread_mutex_t
_mut
;
};
class
GlobalSigmoidCreateMutex
{
public:
pthread_mutex_t
&
mutex
()
{
return
_mut
;
}
static
pthread_mutex_t
&
instance
()
{
static
GlobalSigmoidCreateMutex
gmutex
;
return
gmutex
.
mutex
();
}
private:
GlobalSigmoidCreateMutex
()
{
pthread_mutex_init
(
&
_mut
,
NULL
);
}
pthread_mutex_t
_mut
;
};
using
paddle_infer
::
Config
;
using
paddle_infer
::
Predictor
;
using
paddle_infer
::
Tensor
;
using
paddle_infer
::
CreatePredictor
;
// data interface
class
FluidFamilyCore
{
public:
virtual
~
FluidFamilyCore
()
{}
virtual
bool
Run
(
const
void
*
in_data
,
void
*
out_data
)
{
if
(
!
_core
->
Run
(
*
(
std
::
vector
<
paddle
::
PaddleTensor
>*
)
in_data
,
(
std
::
vector
<
paddle
::
PaddleTensor
>*
)
out_data
))
{
virtual
std
::
vector
<
std
::
string
>
GetInputNames
()
{
return
_core
->
GetInputNames
();
}
virtual
std
::
unique_ptr
<
Tensor
>
GetInputHandle
(
const
std
::
string
&
name
)
{
return
_core
->
GetInputHandle
(
name
);
}
virtual
std
::
vector
<
std
::
string
>
GetOutputNames
()
{
return
_core
->
GetOutputNames
();
}
virtual
std
::
unique_ptr
<
Tensor
>
GetOutputHandle
(
const
std
::
string
&
name
)
{
return
_core
->
GetOutputHandle
(
name
);
}
virtual
bool
Run
()
{
if
(
!
_core
->
Run
())
{
LOG
(
ERROR
)
<<
"Failed call Run with paddle predictor"
;
return
false
;
}
return
true
;
}
...
...
@@ -92,8 +95,8 @@ class FluidFamilyCore {
LOG
(
ERROR
)
<<
"origin paddle Predictor is null."
;
return
-
1
;
}
paddle
::
Paddle
Predictor
*
p_predictor
=
(
paddle
::
Paddle
Predictor
*
)
origin_core
;
Predictor
*
p_predictor
=
(
Predictor
*
)
origin_core
;
_core
=
p_predictor
->
Clone
();
if
(
_core
.
get
()
==
NULL
)
{
LOG
(
ERROR
)
<<
"fail to clone paddle predictor: "
<<
origin_core
;
...
...
@@ -105,7 +108,7 @@ class FluidFamilyCore {
virtual
void
*
get
()
{
return
_core
.
get
();
}
protected:
std
::
unique_ptr
<
paddle
::
Paddle
Predictor
>
_core
;
std
::
shared_ptr
<
Predictor
>
_core
;
};
// infer interface
...
...
@@ -119,51 +122,19 @@ class FluidCpuAnalysisCore : public FluidFamilyCore {
return
-
1
;
}
paddle
::
AnalysisConfig
analysis_
config
;
analysis_
config
.
SetParamsFile
(
data_path
+
"/__params__"
);
analysis_
config
.
SetProgFile
(
data_path
+
"/__model__"
);
analysis_
config
.
DisableGpu
();
analysis_
config
.
SetCpuMathLibraryNumThreads
(
1
);
Config
config
;
config
.
SetParamsFile
(
data_path
+
"/__params__"
);
config
.
SetProgFile
(
data_path
+
"/__model__"
);
config
.
DisableGpu
();
config
.
SetCpuMathLibraryNumThreads
(
1
);
if
(
params
.
enable_memory_optimization
())
{
analysis_
config
.
EnableMemoryOptim
();
config
.
EnableMemoryOptim
();
}
analysis_config
.
SwitchSpecifyInputNames
(
true
);
AutoLock
lock
(
GlobalPaddleCreateMutex
::
instance
());
_core
=
paddle
::
CreatePaddlePredictor
<
paddle
::
AnalysisConfig
>
(
analysis_config
);
if
(
NULL
==
_core
.
get
())
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path: "
<<
data_path
;
return
-
1
;
}
VLOG
(
2
)
<<
"create paddle predictor sucess, path: "
<<
data_path
;
return
0
;
}
};
class
FluidCpuNativeCore
:
public
FluidFamilyCore
{
public:
int
create
(
const
predictor
::
InferEngineCreationParams
&
params
)
{
std
::
string
data_path
=
params
.
get_path
();
if
(
access
(
data_path
.
c_str
(),
F_OK
)
==
-
1
)
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path not exits: "
<<
data_path
;
return
-
1
;
}
paddle
::
NativeConfig
native_config
;
native_config
.
param_file
=
data_path
+
"/__params__"
;
native_config
.
prog_file
=
data_path
+
"/__model__"
;
native_config
.
use_gpu
=
false
;
native_config
.
device
=
0
;
native_config
.
fraction_of_gpu_memory
=
0
;
config
.
SwitchSpecifyInputNames
(
true
);
AutoLock
lock
(
GlobalPaddleCreateMutex
::
instance
());
_core
=
paddle
::
CreatePaddlePredictor
<
paddle
::
NativeConfig
,
paddle
::
PaddleEngineKind
::
kNative
>
(
native_config
);
_core
=
CreatePredictor
(
config
);
if
(
NULL
==
_core
.
get
())
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path: "
<<
data_path
;
return
-
1
;
...
...
@@ -184,54 +155,24 @@ class FluidCpuAnalysisDirCore : public FluidFamilyCore {
return
-
1
;
}
paddle
::
AnalysisConfig
analysis_
config
;
analysis_
config
.
SetModel
(
data_path
);
analysis_
config
.
DisableGpu
();
analysis_
config
.
SwitchSpecifyInputNames
(
true
);
analysis_
config
.
SetCpuMathLibraryNumThreads
(
1
);
Config
config
;
config
.
SetModel
(
data_path
);
config
.
DisableGpu
();
config
.
SwitchSpecifyInputNames
(
true
);
config
.
SetCpuMathLibraryNumThreads
(
1
);
if
(
params
.
enable_memory_optimization
())
{
analysis_
config
.
EnableMemoryOptim
();
config
.
EnableMemoryOptim
();
}
if
(
params
.
enable_ir_optimization
())
{
analysis_
config
.
SwitchIrOptim
(
true
);
config
.
SwitchIrOptim
(
true
);
}
else
{
analysis_
config
.
SwitchIrOptim
(
false
);
config
.
SwitchIrOptim
(
false
);
}
AutoLock
lock
(
GlobalPaddleCreateMutex
::
instance
());
_core
=
paddle
::
CreatePaddlePredictor
<
paddle
::
AnalysisConfig
>
(
analysis_config
);
if
(
NULL
==
_core
.
get
())
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path: "
<<
data_path
;
return
-
1
;
}
VLOG
(
2
)
<<
"create paddle predictor sucess, path: "
<<
data_path
;
return
0
;
}
};
class
FluidCpuNativeDirCore
:
public
FluidFamilyCore
{
public:
int
create
(
const
predictor
::
InferEngineCreationParams
&
params
)
{
std
::
string
data_path
=
params
.
get_path
();
if
(
access
(
data_path
.
c_str
(),
F_OK
)
==
-
1
)
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path not exits: "
<<
data_path
;
return
-
1
;
}
paddle
::
NativeConfig
native_config
;
native_config
.
model_dir
=
data_path
;
native_config
.
use_gpu
=
false
;
native_config
.
device
=
0
;
native_config
.
fraction_of_gpu_memory
=
0
;
AutoLock
lock
(
GlobalPaddleCreateMutex
::
instance
());
_core
=
paddle
::
CreatePaddlePredictor
<
paddle
::
NativeConfig
,
paddle
::
PaddleEngineKind
::
kNative
>
(
native_config
);
_core
=
CreatePredictor
(
config
);
if
(
NULL
==
_core
.
get
())
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path: "
<<
data_path
;
return
-
1
;
...
...
@@ -323,214 +264,6 @@ class Parameter {
float
*
_params
;
};
class
SigmoidModel
{
public:
~
SigmoidModel
()
{}
int
load
(
const
char
*
sigmoid_w_file
,
const
char
*
sigmoid_b_file
,
float
exp_max
,
float
exp_min
)
{
AutoLock
lock
(
GlobalSigmoidCreateMutex
::
instance
());
if
(
0
!=
_sigmoid_w
.
init
(
2
,
1
,
sigmoid_w_file
)
||
0
!=
_sigmoid_w
.
load
())
{
LOG
(
ERROR
)
<<
"load params sigmoid_w failed."
;
return
-
1
;
}
VLOG
(
2
)
<<
"load sigmoid_w ["
<<
_sigmoid_w
.
_params
[
0
]
<<
"] ["
<<
_sigmoid_w
.
_params
[
1
]
<<
"]."
;
if
(
0
!=
_sigmoid_b
.
init
(
2
,
1
,
sigmoid_b_file
)
||
0
!=
_sigmoid_b
.
load
())
{
LOG
(
ERROR
)
<<
"load params sigmoid_b failed."
;
return
-
1
;
}
VLOG
(
2
)
<<
"load sigmoid_b ["
<<
_sigmoid_b
.
_params
[
0
]
<<
"] ["
<<
_sigmoid_b
.
_params
[
1
]
<<
"]."
;
_exp_max_input
=
exp_max
;
_exp_min_input
=
exp_min
;
return
0
;
}
int
softmax
(
float
x
,
double
&
o
)
{
// NOLINT
float
_y0
=
x
*
_sigmoid_w
.
_params
[
0
]
+
_sigmoid_b
.
_params
[
0
];
float
_y1
=
x
*
_sigmoid_w
.
_params
[
1
]
+
_sigmoid_b
.
_params
[
1
];
_y0
=
(
_y0
>
_exp_max_input
)
?
_exp_max_input
:
((
_y0
<
_exp_min_input
)
?
_exp_min_input
:
_y0
);
_y1
=
(
_y1
>
_exp_max_input
)
?
_exp_max_input
:
((
_y1
<
_exp_min_input
)
?
_exp_min_input
:
_y1
);
o
=
1.0
f
/
(
1.0
f
+
exp
(
_y0
-
_y1
));
return
0
;
}
public:
Parameter
_sigmoid_w
;
Parameter
_sigmoid_b
;
float
_exp_max_input
;
float
_exp_min_input
;
};
class
SigmoidFluidModel
{
public:
int
softmax
(
float
x
,
double
&
o
)
{
// NOLINT
return
_sigmoid_core
->
softmax
(
x
,
o
);
}
// NOLINT
std
::
unique_ptr
<
SigmoidFluidModel
>
Clone
()
{
std
::
unique_ptr
<
SigmoidFluidModel
>
clone_model
;
clone_model
.
reset
(
new
SigmoidFluidModel
());
clone_model
->
_sigmoid_core
=
_sigmoid_core
;
clone_model
->
_fluid_core
=
_fluid_core
->
Clone
();
return
std
::
move
(
clone_model
);
// NOLINT
}
public:
std
::
unique_ptr
<
paddle
::
PaddlePredictor
>
_fluid_core
;
std
::
shared_ptr
<
SigmoidModel
>
_sigmoid_core
;
};
class
FluidCpuWithSigmoidCore
:
public
FluidFamilyCore
{
public:
virtual
~
FluidCpuWithSigmoidCore
()
{}
public:
int
create
(
const
predictor
::
InferEngineCreationParams
&
params
)
{
std
::
string
model_path
=
params
.
get_path
();
size_t
pos
=
model_path
.
find_last_of
(
"/
\\
"
);
std
::
string
conf_path
=
model_path
.
substr
(
0
,
pos
);
std
::
string
conf_file
=
model_path
.
substr
(
pos
);
configure
::
SigmoidConf
conf
;
if
(
configure
::
read_proto_conf
(
conf_path
,
conf_file
,
&
conf
)
!=
0
)
{
LOG
(
ERROR
)
<<
"failed load model path: "
<<
model_path
;
return
-
1
;
}
_core
.
reset
(
new
SigmoidFluidModel
);
std
::
string
fluid_model_data_path
=
conf
.
dnn_model_path
();
predictor
::
InferEngineCreationParams
new_params
(
params
);
new_params
.
set_path
(
fluid_model_data_path
);
int
ret
=
load_fluid_model
(
new_params
);
if
(
ret
<
0
)
{
LOG
(
ERROR
)
<<
"fail to load fluid model."
;
return
-
1
;
}
const
char
*
sigmoid_w_file
=
conf
.
sigmoid_w_file
().
c_str
();
const
char
*
sigmoid_b_file
=
conf
.
sigmoid_b_file
().
c_str
();
float
exp_max
=
conf
.
exp_max_input
();
float
exp_min
=
conf
.
exp_min_input
();
_core
->
_sigmoid_core
.
reset
(
new
SigmoidModel
);
VLOG
(
2
)
<<
"create sigmoid core["
<<
_core
->
_sigmoid_core
.
get
()
<<
"], use count["
<<
_core
->
_sigmoid_core
.
use_count
()
<<
"]."
;
ret
=
_core
->
_sigmoid_core
->
load
(
sigmoid_w_file
,
sigmoid_b_file
,
exp_max
,
exp_min
);
if
(
ret
<
0
)
{
LOG
(
ERROR
)
<<
"fail to load sigmoid model."
;
return
-
1
;
}
return
0
;
}
virtual
bool
Run
(
const
void
*
in_data
,
void
*
out_data
)
{
if
(
!
_core
->
_fluid_core
->
Run
(
*
(
std
::
vector
<
paddle
::
PaddleTensor
>*
)
in_data
,
(
std
::
vector
<
paddle
::
PaddleTensor
>*
)
out_data
))
{
LOG
(
ERROR
)
<<
"Failed call Run with paddle predictor"
;
return
false
;
}
return
true
;
}
virtual
int
clone
(
SigmoidFluidModel
*
origin_core
)
{
if
(
origin_core
==
NULL
)
{
LOG
(
ERROR
)
<<
"origin paddle Predictor is null."
;
return
-
1
;
}
_core
=
origin_core
->
Clone
();
if
(
_core
.
get
()
==
NULL
)
{
LOG
(
ERROR
)
<<
"fail to clone paddle predictor: "
<<
origin_core
;
return
-
1
;
}
VLOG
(
2
)
<<
"clone sigmoid core["
<<
_core
->
_sigmoid_core
.
get
()
<<
"] use count["
<<
_core
->
_sigmoid_core
.
use_count
()
<<
"]."
;
return
0
;
}
virtual
SigmoidFluidModel
*
get
()
{
return
_core
.
get
();
}
virtual
int
load_fluid_model
(
const
predictor
::
InferEngineCreationParams
&
params
)
=
0
;
int
softmax
(
float
x
,
double
&
o
)
{
// NOLINT
return
_core
->
_sigmoid_core
->
softmax
(
x
,
o
);
}
protected:
std
::
unique_ptr
<
SigmoidFluidModel
>
_core
;
// NOLINT
};
class
FluidCpuNativeDirWithSigmoidCore
:
public
FluidCpuWithSigmoidCore
{
public:
int
load_fluid_model
(
const
predictor
::
InferEngineCreationParams
&
params
)
{
std
::
string
data_path
=
params
.
get_path
();
if
(
access
(
data_path
.
c_str
(),
F_OK
)
==
-
1
)
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path not exits: "
<<
data_path
;
return
-
1
;
}
paddle
::
NativeConfig
native_config
;
native_config
.
model_dir
=
data_path
;
native_config
.
use_gpu
=
false
;
native_config
.
device
=
0
;
native_config
.
fraction_of_gpu_memory
=
0
;
AutoLock
lock
(
GlobalPaddleCreateMutex
::
instance
());
_core
->
_fluid_core
=
paddle
::
CreatePaddlePredictor
<
paddle
::
NativeConfig
,
paddle
::
PaddleEngineKind
::
kNative
>
(
native_config
);
if
(
NULL
==
_core
.
get
())
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path: "
<<
data_path
;
return
-
1
;
}
VLOG
(
2
)
<<
"create paddle predictor sucess, path: "
<<
data_path
;
return
0
;
}
};
class
FluidCpuAnalysisDirWithSigmoidCore
:
public
FluidCpuWithSigmoidCore
{
public:
int
load_fluid_model
(
const
predictor
::
InferEngineCreationParams
&
params
)
{
std
::
string
data_path
=
params
.
get_path
();
if
(
access
(
data_path
.
c_str
(),
F_OK
)
==
-
1
)
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path not exits: "
<<
data_path
;
return
-
1
;
}
paddle
::
AnalysisConfig
analysis_config
;
analysis_config
.
SetModel
(
data_path
);
analysis_config
.
DisableGpu
();
analysis_config
.
SwitchSpecifyInputNames
(
true
);
analysis_config
.
SetCpuMathLibraryNumThreads
(
1
);
if
(
params
.
enable_memory_optimization
())
{
analysis_config
.
EnableMemoryOptim
();
}
AutoLock
lock
(
GlobalPaddleCreateMutex
::
instance
());
_core
->
_fluid_core
=
paddle
::
CreatePaddlePredictor
<
paddle
::
AnalysisConfig
>
(
analysis_config
);
if
(
NULL
==
_core
.
get
())
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path: "
<<
data_path
;
return
-
1
;
}
VLOG
(
2
)
<<
"create paddle predictor sucess, path: "
<<
data_path
;
return
0
;
}
};
}
// namespace fluid_cpu
}
// namespace paddle_serving
}
// namespace baidu
paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp
浏览文件 @
fcd66ef4
...
...
@@ -30,28 +30,6 @@ REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::
baidu
::
paddle_serving
::
predictor
::
InferEngine
,
"FLUID_CPU_ANALYSIS_DIR"
);
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME
(
::
baidu
::
paddle_serving
::
predictor
::
FluidInferEngine
<
FluidCpuAnalysisDirWithSigmoidCore
>
,
::
baidu
::
paddle_serving
::
predictor
::
InferEngine
,
"FLUID_CPU_ANALYSIS_DIR_SIGMOID"
);
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME
(
::
baidu
::
paddle_serving
::
predictor
::
FluidInferEngine
<
FluidCpuNativeCore
>
,
::
baidu
::
paddle_serving
::
predictor
::
InferEngine
,
"FLUID_CPU_NATIVE"
);
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME
(
::
baidu
::
paddle_serving
::
predictor
::
FluidInferEngine
<
FluidCpuNativeDirCore
>
,
::
baidu
::
paddle_serving
::
predictor
::
InferEngine
,
"FLUID_CPU_NATIVE_DIR"
);
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME
(
::
baidu
::
paddle_serving
::
predictor
::
FluidInferEngine
<
FluidCpuNativeDirWithSigmoidCore
>
,
::
baidu
::
paddle_serving
::
predictor
::
InferEngine
,
"FLUID_CPU_NATIVE_DIR_SIGMOID"
);
}
// namespace fluid_cpu
}
// namespace paddle_serving
}
// namespace baidu
paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
浏览文件 @
fcd66ef4
...
...
@@ -61,31 +61,36 @@ class GlobalPaddleCreateMutex {
pthread_mutex_t
_mut
;
};
class
GlobalSigmoidCreateMutex
{
public:
pthread_mutex_t
&
mutex
()
{
return
_mut
;
}
static
pthread_mutex_t
&
instance
()
{
static
GlobalSigmoidCreateMutex
gmutex
;
return
gmutex
.
mutex
();
}
private:
GlobalSigmoidCreateMutex
()
{
pthread_mutex_init
(
&
_mut
,
NULL
);
}
pthread_mutex_t
_mut
;
};
using
paddle_infer
::
Config
;
using
paddle_infer
::
Predictor
;
using
paddle_infer
::
Tensor
;
using
paddle_infer
::
CreatePredictor
;
// data interface
class
FluidFamilyCore
{
public:
virtual
~
FluidFamilyCore
()
{}
virtual
bool
Run
(
const
void
*
in_data
,
void
*
out_data
)
{
if
(
!
_core
->
Run
(
*
(
std
::
vector
<
paddle
::
PaddleTensor
>*
)
in_data
,
(
std
::
vector
<
paddle
::
PaddleTensor
>*
)
out_data
))
{
virtual
std
::
vector
<
std
::
string
>
GetInputNames
()
{
return
_core
->
GetInputNames
();
}
virtual
std
::
unique_ptr
<
Tensor
>
GetInputHandle
(
const
std
::
string
&
name
)
{
return
_core
->
GetInputHandle
(
name
);
}
virtual
std
::
vector
<
std
::
string
>
GetOutputNames
()
{
return
_core
->
GetOutputNames
();
}
virtual
std
::
unique_ptr
<
Tensor
>
GetOutputHandle
(
const
std
::
string
&
name
)
{
return
_core
->
GetOutputHandle
(
name
);
}
virtual
bool
Run
()
{
if
(
!
_core
->
Run
())
{
LOG
(
ERROR
)
<<
"Failed call Run with paddle predictor"
;
return
false
;
}
return
true
;
}
...
...
@@ -96,8 +101,8 @@ class FluidFamilyCore {
LOG
(
ERROR
)
<<
"origin paddle Predictor is null."
;
return
-
1
;
}
paddle
::
Paddle
Predictor
*
p_predictor
=
(
paddle
::
Paddle
Predictor
*
)
origin_core
;
Predictor
*
p_predictor
=
(
Predictor
*
)
origin_core
;
_core
=
p_predictor
->
Clone
();
if
(
_core
.
get
()
==
NULL
)
{
LOG
(
ERROR
)
<<
"fail to clone paddle predictor: "
<<
origin_core
;
...
...
@@ -109,7 +114,7 @@ class FluidFamilyCore {
virtual
void
*
get
()
{
return
_core
.
get
();
}
protected:
std
::
unique_ptr
<
paddle
::
Paddle
Predictor
>
_core
;
std
::
shared_ptr
<
Predictor
>
_core
;
};
// infer interface
...
...
@@ -123,51 +128,19 @@ class FluidGpuAnalysisCore : public FluidFamilyCore {
return
-
1
;
}
paddle
::
AnalysisConfig
analysis_
config
;
analysis_
config
.
SetParamsFile
(
data_path
+
"/__params__"
);
analysis_
config
.
SetProgFile
(
data_path
+
"/__model__"
);
analysis_
config
.
EnableUseGpu
(
100
,
FLAGS_gpuid
);
analysis_
config
.
SetCpuMathLibraryNumThreads
(
1
);
Config
config
;
config
.
SetParamsFile
(
data_path
+
"/__params__"
);
config
.
SetProgFile
(
data_path
+
"/__model__"
);
config
.
EnableUseGpu
(
100
,
FLAGS_gpuid
);
config
.
SetCpuMathLibraryNumThreads
(
1
);
if
(
params
.
enable_memory_optimization
())
{
analysis_
config
.
EnableMemoryOptim
();
config
.
EnableMemoryOptim
();
}
analysis_config
.
SwitchSpecifyInputNames
(
true
);
config
.
SwitchSpecifyInputNames
(
true
);
AutoLock
lock
(
GlobalPaddleCreateMutex
::
instance
());
_core
=
paddle
::
CreatePaddlePredictor
<
paddle
::
AnalysisConfig
>
(
analysis_config
);
if
(
NULL
==
_core
.
get
())
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path: "
<<
data_path
;
return
-
1
;
}
VLOG
(
2
)
<<
"create paddle predictor sucess, path: "
<<
data_path
;
return
0
;
}
};
class
FluidGpuNativeCore
:
public
FluidFamilyCore
{
public:
int
create
(
const
predictor
::
InferEngineCreationParams
&
params
)
{
std
::
string
data_path
=
params
.
get_path
();
if
(
access
(
data_path
.
c_str
(),
F_OK
)
==
-
1
)
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path not exits: "
<<
data_path
;
return
-
1
;
}
paddle
::
NativeConfig
native_config
;
native_config
.
param_file
=
data_path
+
"/__params__"
;
native_config
.
prog_file
=
data_path
+
"/__model__"
;
native_config
.
use_gpu
=
true
;
native_config
.
fraction_of_gpu_memory
=
0.01
;
native_config
.
device
=
FLAGS_gpuid
;
AutoLock
lock
(
GlobalPaddleCreateMutex
::
instance
());
_core
=
paddle
::
CreatePaddlePredictor
<
paddle
::
NativeConfig
,
paddle
::
PaddleEngineKind
::
kNative
>
(
native_config
);
_core
=
CreatePredictor
(
config
);
if
(
NULL
==
_core
.
get
())
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path: "
<<
data_path
;
return
-
1
;
...
...
@@ -188,81 +161,39 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
return
-
1
;
}
paddle
::
AnalysisConfig
analysis_
config
;
analysis_
config
.
SetModel
(
data_path
);
analysis_
config
.
EnableUseGpu
(
1500
,
FLAGS_gpuid
);
analysis_
config
.
SwitchSpecifyInputNames
(
true
);
analysis_
config
.
SetCpuMathLibraryNumThreads
(
1
);
Config
config
;
config
.
SetModel
(
data_path
);
config
.
EnableUseGpu
(
1500
,
FLAGS_gpuid
);
config
.
SwitchSpecifyInputNames
(
true
);
config
.
SetCpuMathLibraryNumThreads
(
1
);
if
(
params
.
enable_memory_optimization
())
{
analysis_
config
.
EnableMemoryOptim
();
config
.
EnableMemoryOptim
();
}
#if 0 // todo: support flexible shape
int min_seq_len = 1;
int max_seq_len = 512;
int opt_seq_len = 128;
int head_number = 12;
int batch = 50;
std::vector<int> min_in_shape = {batch, min_seq_len, 1};
std::vector<int> max_in_shape = {batch, max_seq_len, 1};
std::vector<int> opt_in_shape = {batch, opt_seq_len, 1};
std::string input1_name = "src_text_a_ids";
std::string input2_name = "pos_text_a_ids";
std::string input3_name = "sent_text_a_ids";
std::string input4_name = "stack_0.tmp_0";
std::map<std::string, std::vector<int>> min_input_shape = {
{input1_name, min_in_shape},
{input2_name, min_in_shape},
{input3_name, min_in_shape},
{input4_name, {batch, head_number, min_seq_len, min_seq_len}},
};
std::map<std::string, std::vector<int>> max_input_shape = {
{input1_name, max_in_shape},
{input2_name, max_in_shape},
{input3_name, max_in_shape},
{input4_name, {batch, head_number, max_seq_len, max_seq_len}},
};
std::map<std::string, std::vector<int>> opt_input_shape = {
{input1_name, opt_in_shape},
{input2_name, opt_in_shape},
{input3_name, opt_in_shape},
{input4_name, {batch, head_number, opt_seq_len, opt_seq_len}},
};
analysis_config.SetTRTDynamicShapeInfo(
min_input_shape, max_input_shape, opt_input_shape);
#endif
int
max_batch
=
32
;
int
min_subgraph_size
=
3
;
if
(
params
.
use_trt
())
{
analysis_
config
.
EnableTensorRtEngine
(
config
.
EnableTensorRtEngine
(
1
<<
20
,
max_batch
,
min_subgraph_size
,
paddle
::
Analysis
Config
::
Precision
::
kFloat32
,
Config
::
Precision
::
kFloat32
,
false
,
false
);
LOG
(
INFO
)
<<
"create TensorRT predictor"
;
}
else
{
if
(
params
.
enable_memory_optimization
())
{
analysis_
config
.
EnableMemoryOptim
();
config
.
EnableMemoryOptim
();
}
if
(
params
.
enable_ir_optimization
())
{
analysis_
config
.
SwitchIrOptim
(
true
);
config
.
SwitchIrOptim
(
true
);
}
else
{
analysis_
config
.
SwitchIrOptim
(
false
);
config
.
SwitchIrOptim
(
false
);
}
}
AutoLock
lock
(
GlobalPaddleCreateMutex
::
instance
());
_core
=
paddle
::
CreatePaddlePredictor
<
paddle
::
AnalysisConfig
>
(
analysis_config
);
_core
=
CreatePredictor
(
config
);
if
(
NULL
==
_core
.
get
())
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path: "
<<
data_path
;
return
-
1
;
...
...
@@ -273,34 +204,6 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
}
};
class
FluidGpuNativeDirCore
:
public
FluidFamilyCore
{
public:
int
create
(
const
predictor
::
InferEngineCreationParams
&
params
)
{
std
::
string
data_path
=
params
.
get_path
();
if
(
access
(
data_path
.
c_str
(),
F_OK
)
==
-
1
)
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path not exits: "
<<
data_path
;
return
-
1
;
}
paddle
::
NativeConfig
native_config
;
native_config
.
model_dir
=
data_path
;
native_config
.
use_gpu
=
true
;
native_config
.
fraction_of_gpu_memory
=
0.01
;
native_config
.
device
=
FLAGS_gpuid
;
AutoLock
lock
(
GlobalPaddleCreateMutex
::
instance
());
_core
=
paddle
::
CreatePaddlePredictor
<
paddle
::
NativeConfig
,
paddle
::
PaddleEngineKind
::
kNative
>
(
native_config
);
if
(
NULL
==
_core
.
get
())
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path: "
<<
data_path
;
return
-
1
;
}
VLOG
(
2
)
<<
"create paddle predictor sucess, path: "
<<
data_path
;
return
0
;
}
};
class
Parameter
{
public:
...
...
@@ -383,214 +286,6 @@ class Parameter {
float
*
_params
;
};
class
SigmoidModel
{
public:
~
SigmoidModel
()
{}
int
load
(
const
char
*
sigmoid_w_file
,
const
char
*
sigmoid_b_file
,
float
exp_max
,
float
exp_min
)
{
AutoLock
lock
(
GlobalSigmoidCreateMutex
::
instance
());
if
(
0
!=
_sigmoid_w
.
init
(
2
,
1
,
sigmoid_w_file
)
||
0
!=
_sigmoid_w
.
load
())
{
LOG
(
ERROR
)
<<
"load params sigmoid_w failed."
;
return
-
1
;
}
VLOG
(
2
)
<<
"load sigmoid_w ["
<<
_sigmoid_w
.
_params
[
0
]
<<
"] ["
<<
_sigmoid_w
.
_params
[
1
]
<<
"]."
;
if
(
0
!=
_sigmoid_b
.
init
(
2
,
1
,
sigmoid_b_file
)
||
0
!=
_sigmoid_b
.
load
())
{
LOG
(
ERROR
)
<<
"load params sigmoid_b failed."
;
return
-
1
;
}
VLOG
(
2
)
<<
"load sigmoid_b ["
<<
_sigmoid_b
.
_params
[
0
]
<<
"] ["
<<
_sigmoid_b
.
_params
[
1
]
<<
"]."
;
_exp_max_input
=
exp_max
;
_exp_min_input
=
exp_min
;
return
0
;
}
int
softmax
(
float
x
,
double
&
o
)
{
// NOLINT
float
_y0
=
x
*
_sigmoid_w
.
_params
[
0
]
+
_sigmoid_b
.
_params
[
0
];
float
_y1
=
x
*
_sigmoid_w
.
_params
[
1
]
+
_sigmoid_b
.
_params
[
1
];
_y0
=
(
_y0
>
_exp_max_input
)
?
_exp_max_input
:
((
_y0
<
_exp_min_input
)
?
_exp_min_input
:
_y0
);
_y1
=
(
_y1
>
_exp_max_input
)
?
_exp_max_input
:
((
_y1
<
_exp_min_input
)
?
_exp_min_input
:
_y1
);
o
=
1.0
f
/
(
1.0
f
+
exp
(
_y0
-
_y1
));
return
0
;
}
public:
Parameter
_sigmoid_w
;
Parameter
_sigmoid_b
;
float
_exp_max_input
;
float
_exp_min_input
;
};
class
SigmoidFluidModel
{
public:
int
softmax
(
float
x
,
double
&
o
)
{
// NOLINT
return
_sigmoid_core
->
softmax
(
x
,
o
);
}
// NOLINT
std
::
unique_ptr
<
SigmoidFluidModel
>
Clone
()
{
std
::
unique_ptr
<
SigmoidFluidModel
>
clone_model
;
clone_model
.
reset
(
new
SigmoidFluidModel
());
clone_model
->
_sigmoid_core
=
_sigmoid_core
;
clone_model
->
_fluid_core
=
_fluid_core
->
Clone
();
return
std
::
move
(
clone_model
);
}
public:
std
::
unique_ptr
<
paddle
::
PaddlePredictor
>
_fluid_core
;
std
::
shared_ptr
<
SigmoidModel
>
_sigmoid_core
;
};
class
FluidGpuWithSigmoidCore
:
public
FluidFamilyCore
{
public:
virtual
~
FluidGpuWithSigmoidCore
()
{}
public:
int
create
(
const
predictor
::
InferEngineCreationParams
&
params
)
{
std
::
string
model_path
=
params
.
get_path
();
size_t
pos
=
model_path
.
find_last_of
(
"/
\\
"
);
std
::
string
conf_path
=
model_path
.
substr
(
0
,
pos
);
std
::
string
conf_file
=
model_path
.
substr
(
pos
);
configure
::
SigmoidConf
conf
;
if
(
configure
::
read_proto_conf
(
conf_path
,
conf_file
,
&
conf
)
!=
0
)
{
LOG
(
ERROR
)
<<
"failed load model path: "
<<
model_path
;
return
-
1
;
}
_core
.
reset
(
new
SigmoidFluidModel
);
std
::
string
fluid_model_data_path
=
conf
.
dnn_model_path
();
predictor
::
InferEngineCreationParams
new_params
(
params
);
new_params
.
set_path
(
fluid_model_data_path
);
int
ret
=
load_fluid_model
(
new_params
);
if
(
ret
<
0
)
{
LOG
(
ERROR
)
<<
"fail to load fluid model."
;
return
-
1
;
}
const
char
*
sigmoid_w_file
=
conf
.
sigmoid_w_file
().
c_str
();
const
char
*
sigmoid_b_file
=
conf
.
sigmoid_b_file
().
c_str
();
float
exp_max
=
conf
.
exp_max_input
();
float
exp_min
=
conf
.
exp_min_input
();
_core
->
_sigmoid_core
.
reset
(
new
SigmoidModel
);
LOG
(
INFO
)
<<
"create sigmoid core["
<<
_core
->
_sigmoid_core
.
get
()
<<
"], use count["
<<
_core
->
_sigmoid_core
.
use_count
()
<<
"]."
;
ret
=
_core
->
_sigmoid_core
->
load
(
sigmoid_w_file
,
sigmoid_b_file
,
exp_max
,
exp_min
);
if
(
ret
<
0
)
{
LOG
(
ERROR
)
<<
"fail to load sigmoid model."
;
return
-
1
;
}
return
0
;
}
virtual
bool
Run
(
const
void
*
in_data
,
void
*
out_data
)
{
if
(
!
_core
->
_fluid_core
->
Run
(
*
(
std
::
vector
<
paddle
::
PaddleTensor
>*
)
in_data
,
(
std
::
vector
<
paddle
::
PaddleTensor
>*
)
out_data
))
{
LOG
(
ERROR
)
<<
"Failed call Run with paddle predictor"
;
return
false
;
}
return
true
;
}
virtual
int
clone
(
SigmoidFluidModel
*
origin_core
)
{
if
(
origin_core
==
NULL
)
{
LOG
(
ERROR
)
<<
"origin paddle Predictor is null."
;
return
-
1
;
}
_core
=
origin_core
->
Clone
();
if
(
_core
.
get
()
==
NULL
)
{
LOG
(
ERROR
)
<<
"fail to clone paddle predictor: "
<<
origin_core
;
return
-
1
;
}
LOG
(
INFO
)
<<
"clone sigmoid core["
<<
_core
->
_sigmoid_core
.
get
()
<<
"] use count["
<<
_core
->
_sigmoid_core
.
use_count
()
<<
"]."
;
return
0
;
}
virtual
SigmoidFluidModel
*
get
()
{
return
_core
.
get
();
}
virtual
int
load_fluid_model
(
const
predictor
::
InferEngineCreationParams
&
params
)
=
0
;
int
softmax
(
float
x
,
double
&
o
)
{
// NOLINT
return
_core
->
_sigmoid_core
->
softmax
(
x
,
o
);
}
protected:
std
::
unique_ptr
<
SigmoidFluidModel
>
_core
;
};
class
FluidGpuNativeDirWithSigmoidCore
:
public
FluidGpuWithSigmoidCore
{
public:
int
load_fluid_model
(
const
predictor
::
InferEngineCreationParams
&
params
)
{
std
::
string
data_path
=
params
.
get_path
();
if
(
access
(
data_path
.
c_str
(),
F_OK
)
==
-
1
)
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path not exits: "
<<
data_path
;
return
-
1
;
}
paddle
::
NativeConfig
native_config
;
native_config
.
model_dir
=
data_path
;
native_config
.
use_gpu
=
true
;
native_config
.
fraction_of_gpu_memory
=
0.01
;
native_config
.
device
=
FLAGS_gpuid
;
AutoLock
lock
(
GlobalPaddleCreateMutex
::
instance
());
_core
->
_fluid_core
=
paddle
::
CreatePaddlePredictor
<
paddle
::
NativeConfig
,
paddle
::
PaddleEngineKind
::
kNative
>
(
native_config
);
if
(
NULL
==
_core
.
get
())
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path: "
<<
data_path
;
return
-
1
;
}
VLOG
(
2
)
<<
"create paddle predictor sucess, path: "
<<
data_path
;
return
0
;
}
};
class
FluidGpuAnalysisDirWithSigmoidCore
:
public
FluidGpuWithSigmoidCore
{
public:
int
load_fluid_model
(
const
predictor
::
InferEngineCreationParams
&
params
)
{
std
::
string
data_path
=
params
.
get_path
();
if
(
access
(
data_path
.
c_str
(),
F_OK
)
==
-
1
)
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path not exits: "
<<
data_path
;
return
-
1
;
}
paddle
::
AnalysisConfig
analysis_config
;
analysis_config
.
SetModel
(
data_path
);
analysis_config
.
EnableUseGpu
(
100
,
FLAGS_gpuid
);
analysis_config
.
SwitchSpecifyInputNames
(
true
);
analysis_config
.
SetCpuMathLibraryNumThreads
(
1
);
if
(
params
.
enable_memory_optimization
())
{
analysis_config
.
EnableMemoryOptim
();
}
AutoLock
lock
(
GlobalPaddleCreateMutex
::
instance
());
_core
->
_fluid_core
=
paddle
::
CreatePaddlePredictor
<
paddle
::
AnalysisConfig
>
(
analysis_config
);
if
(
NULL
==
_core
.
get
())
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path: "
<<
data_path
;
return
-
1
;
}
VLOG
(
2
)
<<
"create paddle predictor sucess, path: "
<<
data_path
;
return
0
;
}
};
}
// namespace fluid_gpu
}
// namespace paddle_serving
}
// namespace baidu
paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp
浏览文件 @
fcd66ef4
...
...
@@ -32,28 +32,6 @@ REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::
baidu
::
paddle_serving
::
predictor
::
InferEngine
,
"FLUID_GPU_ANALYSIS_DIR"
);
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME
(
::
baidu
::
paddle_serving
::
predictor
::
FluidInferEngine
<
FluidGpuAnalysisDirWithSigmoidCore
>
,
::
baidu
::
paddle_serving
::
predictor
::
InferEngine
,
"FLUID_GPU_ANALYSIS_DIR_SIGMOID"
);
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME
(
::
baidu
::
paddle_serving
::
predictor
::
FluidInferEngine
<
FluidGpuNativeCore
>
,
::
baidu
::
paddle_serving
::
predictor
::
InferEngine
,
"FLUID_GPU_NATIVE"
);
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME
(
::
baidu
::
paddle_serving
::
predictor
::
FluidInferEngine
<
FluidGpuNativeDirCore
>
,
::
baidu
::
paddle_serving
::
predictor
::
InferEngine
,
"FLUID_GPU_NATIVE_DIR"
);
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME
(
::
baidu
::
paddle_serving
::
predictor
::
FluidInferEngine
<
FluidGpuNativeDirWithSigmoidCore
>
,
::
baidu
::
paddle_serving
::
predictor
::
InferEngine
,
"FLUID_GPU_NATIVE_DIR_SIGMOID"
);
}
// namespace fluid_gpu
}
// namespace paddle_serving
}
// namespace baidu
python/examples/fit_a_line/local_train.py
浏览文件 @
fcd66ef4
...
...
@@ -16,7 +16,7 @@
import
sys
import
paddle
import
paddle.fluid
as
fluid
paddle
.
enable_static
()
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
uci_housing
.
train
(),
buf_size
=
500
),
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录