Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Serving
提交
35e2ca31
S
Serving
项目概览
PaddlePaddle
/
Serving
大约 1 年 前同步成功
通知
186
Star
833
Fork
253
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
105
列表
看板
标记
里程碑
合并请求
10
Wiki
2
Wiki
分析
仓库
DevOps
项目成员
Pages
S
Serving
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
105
Issue
105
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
2
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
35e2ca31
编写于
12月 30, 2020
作者:
W
wangjiawei04
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix op and gpu
上级
ed3f22b4
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
280 addition
and
373 deletion
+280
-373
core/general-server/op/general_infer_op.cpp
core/general-server/op/general_infer_op.cpp
+13
-0
core/general-server/op/general_reader_op.cpp
core/general-server/op/general_reader_op.cpp
+86
-0
core/general-server/op/general_response_op.cpp
core/general-server/op/general_response_op.cpp
+49
-0
core/predictor/framework/infer.h
core/predictor/framework/infer.h
+78
-0
paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
...inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
+9
-1
paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
...inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
+45
-350
paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp
...e_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp
+0
-22
未找到文件。
core/general-server/op/general_infer_op.cpp
浏览文件 @
35e2ca31
...
...
@@ -36,6 +36,19 @@ using baidu::paddle_serving::predictor::InferManager;
using
baidu
::
paddle_serving
::
predictor
::
PaddleGeneralModelConfig
;
int
GeneralInferOp
::
inference
()
{
VLOG
(
2
)
<<
"Going to run inference"
;
const
std
::
vector
<
std
::
string
>
pre_node_names
=
pre_names
();
if
(
pre_node_names
.
size
()
!=
1
)
{
LOG
(
ERROR
)
<<
"This op("
<<
op_name
()
<<
") can only have one predecessor op, but received "
<<
pre_node_names
.
size
();
return
-
1
;
}
if
(
InferManager
::
instance
().
infer
(
engine_name
().
c_str
()))
{
return
-
1
;
}
std
::
cout
<<
"Infer Success"
<<
std
::
endl
;
return
0
;
}
DEFINE_OP
(
GeneralInferOp
);
...
...
core/general-server/op/general_reader_op.cpp
浏览文件 @
35e2ca31
...
...
@@ -20,6 +20,7 @@
#include "core/general-server/op/general_infer_helper.h"
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/predictor/framework/resource.h"
#include "core/util/include/timer.h"
namespace
baidu
{
...
...
@@ -32,6 +33,7 @@ using baidu::paddle_serving::predictor::general_model::Tensor;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
Request
;
using
baidu
::
paddle_serving
::
predictor
::
general_model
::
FeedInst
;
using
baidu
::
paddle_serving
::
predictor
::
PaddleGeneralModelConfig
;
using
baidu
::
paddle_serving
::
predictor
::
InferManager
;
int
conf_check
(
const
Request
*
req
,
const
std
::
shared_ptr
<
PaddleGeneralModelConfig
>
&
model_config
)
{
...
...
@@ -71,6 +73,90 @@ int conf_check(const Request *req,
int
GeneralReaderOp
::
inference
()
{
// reade request from client
// TODO: only support one engine here
std
::
string
engine_name
=
"general_infer_0"
;
const
Request
*
req
=
dynamic_cast
<
const
Request
*>
(
get_request_message
());
uint64_t
log_id
=
req
->
log_id
();
int
input_var_num
=
0
;
std
::
vector
<
int64_t
>
elem_type
;
std
::
vector
<
int64_t
>
elem_size
;
std
::
vector
<
int64_t
>
capacity
;
int
var_num
=
req
->
insts
(
0
).
tensor_array_size
();
baidu
::
paddle_serving
::
predictor
::
Resource
&
resource
=
baidu
::
paddle_serving
::
predictor
::
Resource
::
instance
();
std
::
shared_ptr
<
PaddleGeneralModelConfig
>
model_config
=
resource
.
get_general_model_config
();
elem_type
.
resize
(
var_num
);
elem_size
.
resize
(
var_num
);
capacity
.
resize
(
var_num
);
for
(
int
i
=
0
;
i
<
var_num
;
++
i
)
{
std
::
string
tensor_name
=
model_config
->
_feed_name
[
i
];
std
::
cout
<<
"START Tensor Name: "
<<
tensor_name
<<
std
::
endl
;
auto
lod_tensor
=
InferManager
::
instance
().
GetInputHandle
(
engine_name
.
c_str
(),
tensor_name
.
c_str
());
std
::
cout
<<
"PICK lod tensor. "
<<
std
::
endl
;
std
::
vector
<
std
::
vector
<
size_t
>>
lod
;
std
::
vector
<
int
>
shape
;
// get lod info here
if
(
req
->
insts
(
0
).
tensor_array
(
i
).
lod_size
()
>
0
)
{
lod
.
resize
(
1
);
for
(
int
k
=
0
;
k
<
req
->
insts
(
0
).
tensor_array
(
i
).
lod_size
();
++
k
)
{
lod
[
0
].
push_back
(
req
->
insts
(
0
).
tensor_array
(
i
).
lod
(
k
));
}
capacity
[
i
]
=
1
;
for
(
int
k
=
0
;
k
<
req
->
insts
(
0
).
tensor_array
(
i
).
shape_size
();
++
k
)
{
int
dim
=
req
->
insts
(
0
).
tensor_array
(
i
).
shape
(
k
);
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") shape for var["
<<
i
<<
"]: "
<<
dim
;
capacity
[
i
]
*=
dim
;
shape
.
push_back
(
dim
);
}
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") var["
<<
i
<<
"] is tensor, capacity: "
<<
capacity
[
i
];
}
else
{
capacity
[
i
]
=
1
;
for
(
int
k
=
0
;
k
<
req
->
insts
(
0
).
tensor_array
(
i
).
shape_size
();
++
k
)
{
int
dim
=
req
->
insts
(
0
).
tensor_array
(
i
).
shape
(
k
);
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") shape for var["
<<
i
<<
"]: "
<<
dim
;
capacity
[
i
]
*=
dim
;
shape
.
push_back
(
dim
);
}
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") var["
<<
i
<<
"] is tensor, capacity: "
<<
capacity
[
i
];
}
lod_tensor
->
SetLoD
(
lod
);
lod_tensor
->
Reshape
(
shape
);
std
::
cout
<<
"FINI Set Lod and Reshape, and elem type: "
<<
elem_type
[
i
]
<<
std
::
endl
;
// insert data here
if
(
req
->
insts
(
0
).
tensor_array
(
i
).
elem_type
()
==
0
)
{
// TODO: Copy twice here, can optimize
int
elem_num
=
req
->
insts
(
0
).
tensor_array
(
i
).
int64_data_size
();
std
::
vector
<
int64_t
>
data
(
elem_num
);
int64_t
*
dst_ptr
=
data
.
data
();
for
(
int
k
=
0
;
k
<
elem_num
;
++
k
)
{
dst_ptr
[
k
]
=
req
->
insts
(
0
).
tensor_array
(
i
).
int64_data
(
k
);
}
lod_tensor
->
CopyFromCpu
(
dst_ptr
);
}
else
if
(
req
->
insts
(
0
).
tensor_array
(
i
).
elem_type
()
==
1
)
{
int
elem_num
=
req
->
insts
(
0
).
tensor_array
(
i
).
float_data_size
();
std
::
vector
<
float
>
data
(
elem_num
);
float
*
dst_ptr
=
data
.
data
();
for
(
int
k
=
0
;
k
<
elem_num
;
++
k
)
{
dst_ptr
[
k
]
=
req
->
insts
(
0
).
tensor_array
(
i
).
float_data
(
k
);
}
lod_tensor
->
CopyFromCpu
(
dst_ptr
);
}
else
if
(
req
->
insts
(
0
).
tensor_array
(
i
).
elem_type
()
==
2
)
{
int
elem_num
=
req
->
insts
(
0
).
tensor_array
(
i
).
int_data_size
();
std
::
vector
<
int32_t
>
data
(
elem_num
);
int32_t
*
dst_ptr
=
data
.
data
();
for
(
int
k
=
0
;
k
<
elem_num
;
++
k
)
{
dst_ptr
[
k
]
=
req
->
insts
(
0
).
tensor_array
(
i
).
int_data
(
k
);
}
lod_tensor
->
CopyFromCpu
(
dst_ptr
);
}
std
::
cout
<<
"FINISH Tensor Name: "
<<
tensor_name
<<
std
::
endl
;
}
return
0
;
}
DEFINE_OP
(
GeneralReaderOp
);
...
...
core/general-server/op/general_response_op.cpp
浏览文件 @
35e2ca31
...
...
@@ -40,6 +40,55 @@ using baidu::paddle_serving::predictor::InferManager;
using
baidu
::
paddle_serving
::
predictor
::
PaddleGeneralModelConfig
;
int
GeneralResponseOp
::
inference
()
{
const
Request
*
req
=
dynamic_cast
<
const
Request
*>
(
get_request_message
());
// response inst with only fetch_var_names
Response
*
res
=
mutable_data
<
Response
>
();
baidu
::
paddle_serving
::
predictor
::
Resource
&
resource
=
baidu
::
paddle_serving
::
predictor
::
Resource
::
instance
();
std
::
shared_ptr
<
PaddleGeneralModelConfig
>
model_config
=
resource
.
get_general_model_config
();
std
::
vector
<
int
>
capacity
(
req
->
fetch_var_names_size
(),
1
);
std
::
string
engine_name
=
"general_infer_0"
;
ModelOutput
*
output
=
res
->
add_outputs
();
FetchInst
*
fetch_inst
=
output
->
add_insts
();
FetchInst
*
fetch_p
=
output
->
mutable_insts
(
0
);
std
::
vector
<
std
::
string
>
outs
=
InferManager
::
instance
().
GetOutputNames
(
engine_name
.
c_str
());
for
(
int
i
=
0
;
i
<
req
->
fetch_var_names_size
();
++
i
)
{
Tensor
*
tensor
=
fetch_inst
->
add_tensor_array
();
std
::
string
tensor_name
=
outs
[
i
];
auto
lod_tensor
=
InferManager
::
instance
().
GetOutputHandle
(
engine_name
.
c_str
(),
tensor_name
.
c_str
());
std
::
vector
<
int
>
shape
=
lod_tensor
->
shape
();
for
(
int
k
=
0
;
k
<
shape
.
size
();
++
k
)
{
capacity
[
i
]
*=
shape
[
k
];
tensor
->
add_shape
(
shape
[
k
]);
}
auto
dtype
=
lod_tensor
->
type
();
if
(
dtype
==
paddle
::
PaddleDType
::
INT64
)
{
std
::
vector
<
int64_t
>
datas
(
capacity
[
i
]);
int64_t
*
data_ptr
=
datas
.
data
();
lod_tensor
->
CopyToCpu
(
data_ptr
);
google
::
protobuf
::
RepeatedField
<
int64_t
>
tmp_data
(
data_ptr
,
data_ptr
+
capacity
[
i
]);
tensor
->
mutable_int64_data
()
->
Swap
(
&
tmp_data
);
}
else
if
(
dtype
==
paddle
::
PaddleDType
::
FLOAT32
)
{
std
::
vector
<
float
>
datas
(
capacity
[
i
]);
float
*
data_ptr
=
datas
.
data
();
lod_tensor
->
CopyToCpu
(
data_ptr
);
google
::
protobuf
::
RepeatedField
<
float
>
tmp_data
(
data_ptr
,
data_ptr
+
capacity
[
i
]);
tensor
->
mutable_float_data
()
->
Swap
(
&
tmp_data
);
}
else
if
(
dtype
==
paddle
::
PaddleDType
::
INT32
)
{
std
::
vector
<
int32_t
>
datas
(
capacity
[
i
]);
int32_t
*
data_ptr
=
datas
.
data
();
lod_tensor
->
CopyToCpu
(
data_ptr
);
google
::
protobuf
::
RepeatedField
<
int32_t
>
tmp_data
(
data_ptr
,
data_ptr
+
capacity
[
i
]);
tensor
->
mutable_int_data
()
->
Swap
(
&
tmp_data
);
}
std
::
vector
<
std
::
vector
<
size_t
>>
lod
=
lod_tensor
->
lod
();
if
(
lod
.
size
()
>
0
)
{
for
(
int
j
=
0
;
j
<
lod
[
0
].
size
();
++
j
)
{
tensor
->
add_lod
(
lod
[
0
][
j
]);
}
}
}
return
0
;
}
...
...
core/predictor/framework/infer.h
浏览文件 @
35e2ca31
...
...
@@ -119,6 +119,8 @@ class InferEngine {
virtual
int
thrd_finalize_impl
()
=
0
;
virtual
int
thrd_clear_impl
()
=
0
;
virtual
int
proc_finalize_impl
()
=
0
;
virtual
std
::
vector
<
std
::
string
>
GetInputNames
()
=
0
;
virtual
std
::
vector
<
std
::
string
>
GetOutputNames
()
=
0
;
virtual
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
GetInputHandle
(
const
std
::
string
&
name
)
=
0
;
virtual
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
GetOutputHandle
(
const
std
::
string
&
name
)
=
0
;
virtual
int
infer_impl
()
=
0
;
...
...
@@ -514,6 +516,22 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
public:
// NOLINT
FluidInferEngine
()
{}
~
FluidInferEngine
()
{}
std
::
vector
<
std
::
string
>
GetInputNames
()
{
FluidFamilyCore
*
core
=
DBReloadableInferEngine
<
FluidFamilyCore
>::
get_core
();
if
(
!
core
||
!
core
->
get
())
{
LOG
(
ERROR
)
<<
"Failed get fluid core in GetInputHandle()"
;
}
return
core
->
GetInputNames
();
}
std
::
vector
<
std
::
string
>
GetOutputNames
()
{
FluidFamilyCore
*
core
=
DBReloadableInferEngine
<
FluidFamilyCore
>::
get_core
();
if
(
!
core
||
!
core
->
get
())
{
LOG
(
ERROR
)
<<
"Failed get fluid core in GetInputHandle()"
;
}
return
core
->
GetOutputNames
();
}
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
GetInputHandle
(
const
std
::
string
&
name
)
{
FluidFamilyCore
*
core
=
DBReloadableInferEngine
<
FluidFamilyCore
>::
get_core
();
if
(
!
core
||
!
core
->
get
())
{
...
...
@@ -677,6 +695,20 @@ class VersionedInferEngine : public InferEngine {
return
engine
->
infer
();
}
std
::
vector
<
std
::
string
>
GetInputNames
()
{
InferEngine
*
engine
=
default_engine
();
if
(
!
engine
)
{
LOG
(
WARNING
)
<<
"fail to get default engine"
;
}
return
engine
->
GetInputNames
();
}
std
::
vector
<
std
::
string
>
GetOutputNames
()
{
InferEngine
*
engine
=
default_engine
();
if
(
!
engine
)
{
LOG
(
WARNING
)
<<
"fail to get default engine"
;
}
return
engine
->
GetOutputNames
();
}
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
GetInputHandle
(
const
std
::
string
&
name
)
{
InferEngine
*
engine
=
default_engine
();
if
(
!
engine
)
{
...
...
@@ -718,6 +750,21 @@ class VersionedInferEngine : public InferEngine {
return
iter
->
second
->
infer
();
}
std
::
vector
<
std
::
string
>
GetInputNames
(
uint64_t
version
)
{
auto
iter
=
_versions
.
find
(
version
);
if
(
iter
==
_versions
.
end
())
{
LOG
(
ERROR
)
<<
"Not found version engine: "
<<
version
;
}
return
iter
->
second
->
GetInputNames
();
}
std
::
vector
<
std
::
string
>
GetOutputNames
(
uint64_t
version
)
{
auto
iter
=
_versions
.
find
(
version
);
if
(
iter
==
_versions
.
end
())
{
LOG
(
ERROR
)
<<
"Not found version engine: "
<<
version
;
}
return
iter
->
second
->
GetOutputNames
();
}
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
GetInputHandle
(
uint64_t
version
,
const
std
::
string
&
name
)
{
auto
iter
=
_versions
.
find
(
version
);
...
...
@@ -867,6 +914,21 @@ class InferManager {
}
return
it
->
second
->
infer
();
}
std
::
vector
<
std
::
string
>
GetInputNames
(
const
char
*
model_name
)
{
auto
it
=
_map
.
find
(
model_name
);
if
(
it
==
_map
.
end
())
{
LOG
(
WARNING
)
<<
"Cannot find engine in map, model name:"
<<
model_name
;
}
return
it
->
second
->
GetInputNames
();
}
std
::
vector
<
std
::
string
>
GetOutputNames
(
const
char
*
model_name
)
{
auto
it
=
_map
.
find
(
model_name
);
if
(
it
==
_map
.
end
())
{
LOG
(
WARNING
)
<<
"Cannot find engine in map, model name:"
<<
model_name
;
}
return
it
->
second
->
GetOutputNames
();
}
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
GetInputHandle
(
const
char
*
model_name
,
const
std
::
string
&
name
)
{
auto
it
=
_map
.
find
(
model_name
);
if
(
it
==
_map
.
end
())
{
...
...
@@ -908,6 +970,22 @@ class InferManager {
}
return
it
->
second
->
infer
(
version
);
}
std
::
vector
<
std
::
string
>
GetInputNames
(
const
char
*
model_name
,
uint64_t
version
)
{
auto
it
=
_map
.
find
(
model_name
);
if
(
it
==
_map
.
end
())
{
LOG
(
WARNING
)
<<
"Cannot find engine in map, model name:"
<<
model_name
;
}
return
it
->
second
->
GetInputNames
(
version
);
}
std
::
vector
<
std
::
string
>
GetOutputNames
(
const
char
*
model_name
,
uint64_t
version
)
{
auto
it
=
_map
.
find
(
model_name
);
if
(
it
==
_map
.
end
())
{
LOG
(
WARNING
)
<<
"Cannot find engine in map, model name:"
<<
model_name
;
}
return
it
->
second
->
GetOutputNames
(
version
);
}
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
GetInputHandle
(
const
char
*
model_name
,
uint64_t
version
,
const
std
::
string
&
name
)
{
auto
it
=
_map
.
find
(
model_name
);
if
(
it
==
_map
.
end
())
{
...
...
paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
浏览文件 @
35e2ca31
...
...
@@ -64,10 +64,18 @@ using paddle_infer::CreatePredictor;
class
FluidFamilyCore
{
public:
virtual
~
FluidFamilyCore
()
{}
virtual
std
::
vector
<
std
::
string
>
GetInputNames
()
{
return
_core
->
GetInputNames
();
}
virtual
std
::
unique_ptr
<
Tensor
>
GetInputHandle
(
const
std
::
string
&
name
)
{
return
_core
->
GetInputHandle
(
name
);
}
virtual
std
::
vector
<
std
::
string
>
GetOutputNames
()
{
return
_core
->
GetOutputNames
();
}
virtual
std
::
unique_ptr
<
Tensor
>
GetOutputHandle
(
const
std
::
string
&
name
)
{
return
_core
->
GetOutputHandle
(
name
);
}
...
...
paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
浏览文件 @
35e2ca31
...
...
@@ -61,31 +61,36 @@ class GlobalPaddleCreateMutex {
pthread_mutex_t
_mut
;
};
class
GlobalSigmoidCreateMutex
{
public:
pthread_mutex_t
&
mutex
()
{
return
_mut
;
}
static
pthread_mutex_t
&
instance
()
{
static
GlobalSigmoidCreateMutex
gmutex
;
return
gmutex
.
mutex
();
}
private:
GlobalSigmoidCreateMutex
()
{
pthread_mutex_init
(
&
_mut
,
NULL
);
}
pthread_mutex_t
_mut
;
};
using
paddle_infer
::
Config
;
using
paddle_infer
::
Predictor
;
using
paddle_infer
::
Tensor
;
using
paddle_infer
::
CreatePredictor
;
// data interface
class
FluidFamilyCore
{
public:
virtual
~
FluidFamilyCore
()
{}
virtual
bool
Run
(
const
void
*
in_data
,
void
*
out_data
)
{
if
(
!
_core
->
Run
(
*
(
std
::
vector
<
paddle
::
PaddleTensor
>*
)
in_data
,
(
std
::
vector
<
paddle
::
PaddleTensor
>*
)
out_data
))
{
virtual
std
::
vector
<
std
::
string
>
GetInputNames
()
{
return
_core
->
GetInputNames
();
}
virtual
std
::
unique_ptr
<
Tensor
>
GetInputHandle
(
const
std
::
string
&
name
)
{
return
_core
->
GetInputHandle
(
name
);
}
virtual
std
::
vector
<
std
::
string
>
GetOutputNames
()
{
return
_core
->
GetOutputNames
();
}
virtual
std
::
unique_ptr
<
Tensor
>
GetOutputHandle
(
const
std
::
string
&
name
)
{
return
_core
->
GetOutputHandle
(
name
);
}
virtual
bool
Run
()
{
if
(
!
_core
->
Run
())
{
LOG
(
ERROR
)
<<
"Failed call Run with paddle predictor"
;
return
false
;
}
return
true
;
}
...
...
@@ -96,8 +101,8 @@ class FluidFamilyCore {
LOG
(
ERROR
)
<<
"origin paddle Predictor is null."
;
return
-
1
;
}
paddle
::
Paddle
Predictor
*
p_predictor
=
(
paddle
::
Paddle
Predictor
*
)
origin_core
;
Predictor
*
p_predictor
=
(
Predictor
*
)
origin_core
;
_core
=
p_predictor
->
Clone
();
if
(
_core
.
get
()
==
NULL
)
{
LOG
(
ERROR
)
<<
"fail to clone paddle predictor: "
<<
origin_core
;
...
...
@@ -109,7 +114,7 @@ class FluidFamilyCore {
virtual
void
*
get
()
{
return
_core
.
get
();
}
protected:
std
::
unique_ptr
<
paddle
::
Paddle
Predictor
>
_core
;
std
::
shared_ptr
<
Predictor
>
_core
;
};
// infer interface
...
...
@@ -123,51 +128,19 @@ class FluidGpuAnalysisCore : public FluidFamilyCore {
return
-
1
;
}
paddle
::
AnalysisConfig
analysis_
config
;
analysis_
config
.
SetParamsFile
(
data_path
+
"/__params__"
);
analysis_
config
.
SetProgFile
(
data_path
+
"/__model__"
);
analysis_
config
.
EnableUseGpu
(
100
,
FLAGS_gpuid
);
analysis_
config
.
SetCpuMathLibraryNumThreads
(
1
);
Config
config
;
config
.
SetParamsFile
(
data_path
+
"/__params__"
);
config
.
SetProgFile
(
data_path
+
"/__model__"
);
config
.
EnableUseGpu
(
100
,
FLAGS_gpuid
);
config
.
SetCpuMathLibraryNumThreads
(
1
);
if
(
params
.
enable_memory_optimization
())
{
analysis_
config
.
EnableMemoryOptim
();
config
.
EnableMemoryOptim
();
}
analysis_config
.
SwitchSpecifyInputNames
(
true
);
config
.
SwitchSpecifyInputNames
(
true
);
AutoLock
lock
(
GlobalPaddleCreateMutex
::
instance
());
_core
=
paddle
::
CreatePaddlePredictor
<
paddle
::
AnalysisConfig
>
(
analysis_config
);
if
(
NULL
==
_core
.
get
())
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path: "
<<
data_path
;
return
-
1
;
}
VLOG
(
2
)
<<
"create paddle predictor sucess, path: "
<<
data_path
;
return
0
;
}
};
class
FluidGpuNativeCore
:
public
FluidFamilyCore
{
public:
int
create
(
const
predictor
::
InferEngineCreationParams
&
params
)
{
std
::
string
data_path
=
params
.
get_path
();
if
(
access
(
data_path
.
c_str
(),
F_OK
)
==
-
1
)
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path not exits: "
<<
data_path
;
return
-
1
;
}
paddle
::
NativeConfig
native_config
;
native_config
.
param_file
=
data_path
+
"/__params__"
;
native_config
.
prog_file
=
data_path
+
"/__model__"
;
native_config
.
use_gpu
=
true
;
native_config
.
fraction_of_gpu_memory
=
0.01
;
native_config
.
device
=
FLAGS_gpuid
;
AutoLock
lock
(
GlobalPaddleCreateMutex
::
instance
());
_core
=
paddle
::
CreatePaddlePredictor
<
paddle
::
NativeConfig
,
paddle
::
PaddleEngineKind
::
kNative
>
(
native_config
);
_core
=
CreatePredictor
(
config
);
if
(
NULL
==
_core
.
get
())
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path: "
<<
data_path
;
return
-
1
;
...
...
@@ -188,81 +161,39 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
return
-
1
;
}
paddle
::
AnalysisConfig
analysis_
config
;
analysis_
config
.
SetModel
(
data_path
);
analysis_
config
.
EnableUseGpu
(
1500
,
FLAGS_gpuid
);
analysis_
config
.
SwitchSpecifyInputNames
(
true
);
analysis_
config
.
SetCpuMathLibraryNumThreads
(
1
);
Config
config
;
config
.
SetModel
(
data_path
);
config
.
EnableUseGpu
(
1500
,
FLAGS_gpuid
);
config
.
SwitchSpecifyInputNames
(
true
);
config
.
SetCpuMathLibraryNumThreads
(
1
);
if
(
params
.
enable_memory_optimization
())
{
analysis_
config
.
EnableMemoryOptim
();
config
.
EnableMemoryOptim
();
}
#if 0 // todo: support flexible shape
int min_seq_len = 1;
int max_seq_len = 512;
int opt_seq_len = 128;
int head_number = 12;
int batch = 50;
std::vector<int> min_in_shape = {batch, min_seq_len, 1};
std::vector<int> max_in_shape = {batch, max_seq_len, 1};
std::vector<int> opt_in_shape = {batch, opt_seq_len, 1};
std::string input1_name = "src_text_a_ids";
std::string input2_name = "pos_text_a_ids";
std::string input3_name = "sent_text_a_ids";
std::string input4_name = "stack_0.tmp_0";
std::map<std::string, std::vector<int>> min_input_shape = {
{input1_name, min_in_shape},
{input2_name, min_in_shape},
{input3_name, min_in_shape},
{input4_name, {batch, head_number, min_seq_len, min_seq_len}},
};
std::map<std::string, std::vector<int>> max_input_shape = {
{input1_name, max_in_shape},
{input2_name, max_in_shape},
{input3_name, max_in_shape},
{input4_name, {batch, head_number, max_seq_len, max_seq_len}},
};
std::map<std::string, std::vector<int>> opt_input_shape = {
{input1_name, opt_in_shape},
{input2_name, opt_in_shape},
{input3_name, opt_in_shape},
{input4_name, {batch, head_number, opt_seq_len, opt_seq_len}},
};
analysis_config.SetTRTDynamicShapeInfo(
min_input_shape, max_input_shape, opt_input_shape);
#endif
int
max_batch
=
32
;
int
min_subgraph_size
=
3
;
if
(
params
.
use_trt
())
{
analysis_
config
.
EnableTensorRtEngine
(
config
.
EnableTensorRtEngine
(
1
<<
20
,
max_batch
,
min_subgraph_size
,
paddle
::
Analysis
Config
::
Precision
::
kFloat32
,
Config
::
Precision
::
kFloat32
,
false
,
false
);
LOG
(
INFO
)
<<
"create TensorRT predictor"
;
}
else
{
if
(
params
.
enable_memory_optimization
())
{
analysis_
config
.
EnableMemoryOptim
();
config
.
EnableMemoryOptim
();
}
if
(
params
.
enable_ir_optimization
())
{
analysis_
config
.
SwitchIrOptim
(
true
);
config
.
SwitchIrOptim
(
true
);
}
else
{
analysis_
config
.
SwitchIrOptim
(
false
);
config
.
SwitchIrOptim
(
false
);
}
}
AutoLock
lock
(
GlobalPaddleCreateMutex
::
instance
());
_core
=
paddle
::
CreatePaddlePredictor
<
paddle
::
AnalysisConfig
>
(
analysis_config
);
_core
=
CreatePredictor
(
config
);
if
(
NULL
==
_core
.
get
())
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path: "
<<
data_path
;
return
-
1
;
...
...
@@ -273,34 +204,6 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
}
};
class
FluidGpuNativeDirCore
:
public
FluidFamilyCore
{
public:
int
create
(
const
predictor
::
InferEngineCreationParams
&
params
)
{
std
::
string
data_path
=
params
.
get_path
();
if
(
access
(
data_path
.
c_str
(),
F_OK
)
==
-
1
)
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path not exits: "
<<
data_path
;
return
-
1
;
}
paddle
::
NativeConfig
native_config
;
native_config
.
model_dir
=
data_path
;
native_config
.
use_gpu
=
true
;
native_config
.
fraction_of_gpu_memory
=
0.01
;
native_config
.
device
=
FLAGS_gpuid
;
AutoLock
lock
(
GlobalPaddleCreateMutex
::
instance
());
_core
=
paddle
::
CreatePaddlePredictor
<
paddle
::
NativeConfig
,
paddle
::
PaddleEngineKind
::
kNative
>
(
native_config
);
if
(
NULL
==
_core
.
get
())
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path: "
<<
data_path
;
return
-
1
;
}
VLOG
(
2
)
<<
"create paddle predictor sucess, path: "
<<
data_path
;
return
0
;
}
};
class
Parameter
{
public:
...
...
@@ -383,214 +286,6 @@ class Parameter {
float
*
_params
;
};
class
SigmoidModel
{
public:
~
SigmoidModel
()
{}
int
load
(
const
char
*
sigmoid_w_file
,
const
char
*
sigmoid_b_file
,
float
exp_max
,
float
exp_min
)
{
AutoLock
lock
(
GlobalSigmoidCreateMutex
::
instance
());
if
(
0
!=
_sigmoid_w
.
init
(
2
,
1
,
sigmoid_w_file
)
||
0
!=
_sigmoid_w
.
load
())
{
LOG
(
ERROR
)
<<
"load params sigmoid_w failed."
;
return
-
1
;
}
VLOG
(
2
)
<<
"load sigmoid_w ["
<<
_sigmoid_w
.
_params
[
0
]
<<
"] ["
<<
_sigmoid_w
.
_params
[
1
]
<<
"]."
;
if
(
0
!=
_sigmoid_b
.
init
(
2
,
1
,
sigmoid_b_file
)
||
0
!=
_sigmoid_b
.
load
())
{
LOG
(
ERROR
)
<<
"load params sigmoid_b failed."
;
return
-
1
;
}
VLOG
(
2
)
<<
"load sigmoid_b ["
<<
_sigmoid_b
.
_params
[
0
]
<<
"] ["
<<
_sigmoid_b
.
_params
[
1
]
<<
"]."
;
_exp_max_input
=
exp_max
;
_exp_min_input
=
exp_min
;
return
0
;
}
int
softmax
(
float
x
,
double
&
o
)
{
// NOLINT
float
_y0
=
x
*
_sigmoid_w
.
_params
[
0
]
+
_sigmoid_b
.
_params
[
0
];
float
_y1
=
x
*
_sigmoid_w
.
_params
[
1
]
+
_sigmoid_b
.
_params
[
1
];
_y0
=
(
_y0
>
_exp_max_input
)
?
_exp_max_input
:
((
_y0
<
_exp_min_input
)
?
_exp_min_input
:
_y0
);
_y1
=
(
_y1
>
_exp_max_input
)
?
_exp_max_input
:
((
_y1
<
_exp_min_input
)
?
_exp_min_input
:
_y1
);
o
=
1.0
f
/
(
1.0
f
+
exp
(
_y0
-
_y1
));
return
0
;
}
public:
Parameter
_sigmoid_w
;
Parameter
_sigmoid_b
;
float
_exp_max_input
;
float
_exp_min_input
;
};
class
SigmoidFluidModel
{
public:
int
softmax
(
float
x
,
double
&
o
)
{
// NOLINT
return
_sigmoid_core
->
softmax
(
x
,
o
);
}
// NOLINT
std
::
unique_ptr
<
SigmoidFluidModel
>
Clone
()
{
std
::
unique_ptr
<
SigmoidFluidModel
>
clone_model
;
clone_model
.
reset
(
new
SigmoidFluidModel
());
clone_model
->
_sigmoid_core
=
_sigmoid_core
;
clone_model
->
_fluid_core
=
_fluid_core
->
Clone
();
return
std
::
move
(
clone_model
);
}
public:
std
::
unique_ptr
<
paddle
::
PaddlePredictor
>
_fluid_core
;
std
::
shared_ptr
<
SigmoidModel
>
_sigmoid_core
;
};
class
FluidGpuWithSigmoidCore
:
public
FluidFamilyCore
{
public:
virtual
~
FluidGpuWithSigmoidCore
()
{}
public:
int
create
(
const
predictor
::
InferEngineCreationParams
&
params
)
{
std
::
string
model_path
=
params
.
get_path
();
size_t
pos
=
model_path
.
find_last_of
(
"/
\\
"
);
std
::
string
conf_path
=
model_path
.
substr
(
0
,
pos
);
std
::
string
conf_file
=
model_path
.
substr
(
pos
);
configure
::
SigmoidConf
conf
;
if
(
configure
::
read_proto_conf
(
conf_path
,
conf_file
,
&
conf
)
!=
0
)
{
LOG
(
ERROR
)
<<
"failed load model path: "
<<
model_path
;
return
-
1
;
}
_core
.
reset
(
new
SigmoidFluidModel
);
std
::
string
fluid_model_data_path
=
conf
.
dnn_model_path
();
predictor
::
InferEngineCreationParams
new_params
(
params
);
new_params
.
set_path
(
fluid_model_data_path
);
int
ret
=
load_fluid_model
(
new_params
);
if
(
ret
<
0
)
{
LOG
(
ERROR
)
<<
"fail to load fluid model."
;
return
-
1
;
}
const
char
*
sigmoid_w_file
=
conf
.
sigmoid_w_file
().
c_str
();
const
char
*
sigmoid_b_file
=
conf
.
sigmoid_b_file
().
c_str
();
float
exp_max
=
conf
.
exp_max_input
();
float
exp_min
=
conf
.
exp_min_input
();
_core
->
_sigmoid_core
.
reset
(
new
SigmoidModel
);
LOG
(
INFO
)
<<
"create sigmoid core["
<<
_core
->
_sigmoid_core
.
get
()
<<
"], use count["
<<
_core
->
_sigmoid_core
.
use_count
()
<<
"]."
;
ret
=
_core
->
_sigmoid_core
->
load
(
sigmoid_w_file
,
sigmoid_b_file
,
exp_max
,
exp_min
);
if
(
ret
<
0
)
{
LOG
(
ERROR
)
<<
"fail to load sigmoid model."
;
return
-
1
;
}
return
0
;
}
virtual
bool
Run
(
const
void
*
in_data
,
void
*
out_data
)
{
if
(
!
_core
->
_fluid_core
->
Run
(
*
(
std
::
vector
<
paddle
::
PaddleTensor
>*
)
in_data
,
(
std
::
vector
<
paddle
::
PaddleTensor
>*
)
out_data
))
{
LOG
(
ERROR
)
<<
"Failed call Run with paddle predictor"
;
return
false
;
}
return
true
;
}
virtual
int
clone
(
SigmoidFluidModel
*
origin_core
)
{
if
(
origin_core
==
NULL
)
{
LOG
(
ERROR
)
<<
"origin paddle Predictor is null."
;
return
-
1
;
}
_core
=
origin_core
->
Clone
();
if
(
_core
.
get
()
==
NULL
)
{
LOG
(
ERROR
)
<<
"fail to clone paddle predictor: "
<<
origin_core
;
return
-
1
;
}
LOG
(
INFO
)
<<
"clone sigmoid core["
<<
_core
->
_sigmoid_core
.
get
()
<<
"] use count["
<<
_core
->
_sigmoid_core
.
use_count
()
<<
"]."
;
return
0
;
}
virtual
SigmoidFluidModel
*
get
()
{
return
_core
.
get
();
}
virtual
int
load_fluid_model
(
const
predictor
::
InferEngineCreationParams
&
params
)
=
0
;
int
softmax
(
float
x
,
double
&
o
)
{
// NOLINT
return
_core
->
_sigmoid_core
->
softmax
(
x
,
o
);
}
protected:
std
::
unique_ptr
<
SigmoidFluidModel
>
_core
;
};
class
FluidGpuNativeDirWithSigmoidCore
:
public
FluidGpuWithSigmoidCore
{
public:
int
load_fluid_model
(
const
predictor
::
InferEngineCreationParams
&
params
)
{
std
::
string
data_path
=
params
.
get_path
();
if
(
access
(
data_path
.
c_str
(),
F_OK
)
==
-
1
)
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path not exits: "
<<
data_path
;
return
-
1
;
}
paddle
::
NativeConfig
native_config
;
native_config
.
model_dir
=
data_path
;
native_config
.
use_gpu
=
true
;
native_config
.
fraction_of_gpu_memory
=
0.01
;
native_config
.
device
=
FLAGS_gpuid
;
AutoLock
lock
(
GlobalPaddleCreateMutex
::
instance
());
_core
->
_fluid_core
=
paddle
::
CreatePaddlePredictor
<
paddle
::
NativeConfig
,
paddle
::
PaddleEngineKind
::
kNative
>
(
native_config
);
if
(
NULL
==
_core
.
get
())
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path: "
<<
data_path
;
return
-
1
;
}
VLOG
(
2
)
<<
"create paddle predictor sucess, path: "
<<
data_path
;
return
0
;
}
};
class
FluidGpuAnalysisDirWithSigmoidCore
:
public
FluidGpuWithSigmoidCore
{
public:
int
load_fluid_model
(
const
predictor
::
InferEngineCreationParams
&
params
)
{
std
::
string
data_path
=
params
.
get_path
();
if
(
access
(
data_path
.
c_str
(),
F_OK
)
==
-
1
)
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path not exits: "
<<
data_path
;
return
-
1
;
}
paddle
::
AnalysisConfig
analysis_config
;
analysis_config
.
SetModel
(
data_path
);
analysis_config
.
EnableUseGpu
(
100
,
FLAGS_gpuid
);
analysis_config
.
SwitchSpecifyInputNames
(
true
);
analysis_config
.
SetCpuMathLibraryNumThreads
(
1
);
if
(
params
.
enable_memory_optimization
())
{
analysis_config
.
EnableMemoryOptim
();
}
AutoLock
lock
(
GlobalPaddleCreateMutex
::
instance
());
_core
->
_fluid_core
=
paddle
::
CreatePaddlePredictor
<
paddle
::
AnalysisConfig
>
(
analysis_config
);
if
(
NULL
==
_core
.
get
())
{
LOG
(
ERROR
)
<<
"create paddle predictor failed, path: "
<<
data_path
;
return
-
1
;
}
VLOG
(
2
)
<<
"create paddle predictor sucess, path: "
<<
data_path
;
return
0
;
}
};
}
// namespace fluid_gpu
}
// namespace paddle_serving
}
// namespace baidu
paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp
浏览文件 @
35e2ca31
...
...
@@ -32,28 +32,6 @@ REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::
baidu
::
paddle_serving
::
predictor
::
InferEngine
,
"FLUID_GPU_ANALYSIS_DIR"
);
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME
(
::
baidu
::
paddle_serving
::
predictor
::
FluidInferEngine
<
FluidGpuAnalysisDirWithSigmoidCore
>
,
::
baidu
::
paddle_serving
::
predictor
::
InferEngine
,
"FLUID_GPU_ANALYSIS_DIR_SIGMOID"
);
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME
(
::
baidu
::
paddle_serving
::
predictor
::
FluidInferEngine
<
FluidGpuNativeCore
>
,
::
baidu
::
paddle_serving
::
predictor
::
InferEngine
,
"FLUID_GPU_NATIVE"
);
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME
(
::
baidu
::
paddle_serving
::
predictor
::
FluidInferEngine
<
FluidGpuNativeDirCore
>
,
::
baidu
::
paddle_serving
::
predictor
::
InferEngine
,
"FLUID_GPU_NATIVE_DIR"
);
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME
(
::
baidu
::
paddle_serving
::
predictor
::
FluidInferEngine
<
FluidGpuNativeDirWithSigmoidCore
>
,
::
baidu
::
paddle_serving
::
predictor
::
InferEngine
,
"FLUID_GPU_NATIVE_DIR_SIGMOID"
);
}
// namespace fluid_gpu
}
// namespace paddle_serving
}
// namespace baidu
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录