Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Serving
提交
a5cdb95f
S
Serving
项目概览
PaddlePaddle
/
Serving
大约 1 年 前同步成功
通知
186
Star
833
Fork
253
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
105
列表
看板
标记
里程碑
合并请求
10
Wiki
2
Wiki
分析
仓库
DevOps
项目成员
Pages
S
Serving
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
105
Issue
105
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
2
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a5cdb95f
编写于
9月 07, 2021
作者:
T
TeslaZhao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add cube cache
上级
0e58dbfb
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
94 addition
and
40 deletion
+94
-40
core/general-server/op/general_dist_kv_infer_op.cpp
core/general-server/op/general_dist_kv_infer_op.cpp
+18
-21
core/predictor/framework/cache.cpp
core/predictor/framework/cache.cpp
+7
-5
core/predictor/framework/infer.h
core/predictor/framework/infer.h
+69
-14
未找到文件。
core/general-server/op/general_dist_kv_infer_op.cpp
浏览文件 @
a5cdb95f
...
@@ -130,14 +130,19 @@ int GeneralDistKVInferOp::inference() {
...
@@ -130,14 +130,19 @@ int GeneralDistKVInferOp::inference() {
// fitler cache keys
// fitler cache keys
size_t
hit_counts
=
0
;
size_t
hit_counts
=
0
;
int64_t
seek_cache_start
=
timeline
.
TimeStampUS
();
CubeCache
*
p_cube_cache
=
CubeCache
*
p_cube_cache
=
InferManager
::
instance
().
get_cube_cache
(
engine_name
().
c_str
());
InferManager
::
instance
().
get_cube_cache
(
engine_name
().
c_str
());
if
(
p_cube_cache
!=
nullptr
)
{
if
(
p_cube_cache
!=
nullptr
)
{
for
(
size_t
i
=
0
;
i
<
unique_keys_count
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
unique_keys_count
;
++
i
)
{
rec
::
mcube
::
CubeValue
*
hit_val
=
p_cube_cache
->
get_data
(
unique_keys
[
i
]);
rec
::
mcube
::
CubeValue
*
hit_val
=
p_cube_cache
->
get_data
(
unique_keys
[
i
]);
if
(
hit_val
)
{
if
(
hit_val
)
{
LOG
(
WARNING
)
<<
"Hit one cache. key:"
<<
unique_keys
[
i
];
//
LOG(WARNING) << "Hit one cache. key:" << unique_keys[i];
key_map
[
unique_keys
[
i
]]
=
hit_val
;
key_map
[
unique_keys
[
i
]]
=
hit_val
;
if
(
hit_counts
%
100
==
0
)
{
LOG
(
WARNING
)
<<
"hit cache! key:"
<<
unique_keys
[
i
]
<<
" value:"
<<
hit_val
->
buff
;
}
unique_keys
[
i
]
=
0
;
unique_keys
[
i
]
=
0
;
++
hit_counts
;
++
hit_counts
;
}
}
...
@@ -156,8 +161,10 @@ int GeneralDistKVInferOp::inference() {
...
@@ -156,8 +161,10 @@ int GeneralDistKVInferOp::inference() {
}
}
}
}
}
}
LOG
(
WARNING
)
<<
"Hit "
<<
hit_counts
int64_t
seek_cache_end
=
timeline
.
TimeStampUS
();
<<
" keys in cube cache, unique_keys:"
<<
unique_keys
.
size
();
VLOG
(
2
)
<<
"cache hit "
<<
hit_counts
<<
" keys in cube cache, last unique_keys:"
<<
unique_keys
.
size
()
<<
" , seek_time:"
<<
seek_cache_end
-
seek_cache_start
;
// seek sparse params
// seek sparse params
rec
::
mcube
::
CubeAPI
*
cube
=
rec
::
mcube
::
CubeAPI
::
instance
();
rec
::
mcube
::
CubeAPI
*
cube
=
rec
::
mcube
::
CubeAPI
::
instance
();
...
@@ -170,7 +177,8 @@ int GeneralDistKVInferOp::inference() {
...
@@ -170,7 +177,8 @@ int GeneralDistKVInferOp::inference() {
int
ret
=
cube
->
seek
(
table_names
[
0
],
unique_keys
,
&
values
);
int
ret
=
cube
->
seek
(
table_names
[
0
],
unique_keys
,
&
values
);
int64_t
seek_end
=
timeline
.
TimeStampUS
();
int64_t
seek_end
=
timeline
.
TimeStampUS
();
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") cube seek status: "
<<
ret
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") cube seek status: "
<<
ret
<<
" seek_time: "
<<
seek_end
-
seek_start
;
<<
" , unique_key: "
<<
unique_keys
.
size
()
<<
" , seek_time: "
<<
seek_end
-
seek_start
;
for
(
size_t
i
=
0
;
i
<
unique_keys
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
unique_keys
.
size
();
++
i
)
{
key_map
[
unique_keys
[
i
]]
=
&
values
[
i
];
key_map
[
unique_keys
[
i
]]
=
&
values
[
i
];
...
@@ -179,7 +187,8 @@ int GeneralDistKVInferOp::inference() {
...
@@ -179,7 +187,8 @@ int GeneralDistKVInferOp::inference() {
LOG
(
ERROR
)
<<
"cube value return null"
;
LOG
(
ERROR
)
<<
"cube value return null"
;
}
}
// size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float);
// size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float);
size_t
EMBEDDING_SIZE
=
(
values
[
0
].
buff
.
size
()
-
10
)
/
sizeof
(
float
);
// size_t EMBEDDING_SIZE = (values[0].buff.size() - 10) / sizeof(float);
size_t
EMBEDDING_SIZE
=
9
;
TensorVector
sparse_out
;
TensorVector
sparse_out
;
sparse_out
.
resize
(
sparse_count
);
sparse_out
.
resize
(
sparse_count
);
TensorVector
dense_out
;
TensorVector
dense_out
;
...
@@ -194,18 +203,12 @@ int GeneralDistKVInferOp::inference() {
...
@@ -194,18 +203,12 @@ int GeneralDistKVInferOp::inference() {
resource
.
get_general_model_config
().
front
();
resource
.
get_general_model_config
().
front
();
int
cube_key_found
=
0
;
int
cube_key_found
=
0
;
int
cube_key_miss
=
0
;
int
cube_key_miss
=
0
;
for
(
size_t
i
=
0
;
i
<
in
->
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
in
->
size
();
++
i
)
{
VLOG
(
2
)
<<
"i: "
<<
i
<<
", sparse_idx: "
<<
sparse_idx
<<
", dense_idx: "
<<
dense_idx
;
VLOG
(
2
)
<<
"i: "
<<
i
<<
", dtype: "
<<
in
->
at
(
i
).
dtype
;
if
(
in
->
at
(
i
).
dtype
!=
paddle
::
PaddleDType
::
INT64
)
{
if
(
in
->
at
(
i
).
dtype
!=
paddle
::
PaddleDType
::
INT64
)
{
dense_out
[
dense_idx
]
=
in
->
at
(
i
);
dense_out
[
dense_idx
]
=
in
->
at
(
i
);
++
dense_idx
;
++
dense_idx
;
continue
;
continue
;
}
}
VLOG
(
2
)
<<
"in->size: "
<<
in
->
size
()
<<
", "
;
VLOG
(
2
)
<<
"lod.size: "
<<
in
->
at
(
i
).
lod
.
size
();
sparse_out
[
sparse_idx
].
lod
.
resize
(
in
->
at
(
i
).
lod
.
size
());
sparse_out
[
sparse_idx
].
lod
.
resize
(
in
->
at
(
i
).
lod
.
size
());
for
(
size_t
x
=
0
;
x
<
sparse_out
[
sparse_idx
].
lod
.
size
();
++
x
)
{
for
(
size_t
x
=
0
;
x
<
sparse_out
[
sparse_idx
].
lod
.
size
();
++
x
)
{
sparse_out
[
sparse_idx
].
lod
[
x
].
resize
(
in
->
at
(
i
).
lod
[
x
].
size
());
sparse_out
[
sparse_idx
].
lod
[
x
].
resize
(
in
->
at
(
i
).
lod
[
x
].
size
());
...
@@ -228,21 +231,17 @@ int GeneralDistKVInferOp::inference() {
...
@@ -228,21 +231,17 @@ int GeneralDistKVInferOp::inference() {
for
(
int
x
=
0
;
x
<
sparse_out
[
sparse_idx
].
lod
[
0
].
back
();
++
x
)
{
for
(
int
x
=
0
;
x
<
sparse_out
[
sparse_idx
].
lod
[
0
].
back
();
++
x
)
{
float
*
data_ptr
=
dst_ptr
+
x
*
EMBEDDING_SIZE
;
float
*
data_ptr
=
dst_ptr
+
x
*
EMBEDDING_SIZE
;
uint64_t
cur_key
=
keys
[
cube_val_idx
];
uint64_t
cur_key
=
keys
[
cube_val_idx
];
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") x: "
<<
x
<<
", sparse_idx: "
<<
sparse_idx
<<
" cur_key: "
<<
cur_key
<<
", cube_val_idx:"
<<
cube_val_idx
;
rec
::
mcube
::
CubeValue
*
cur_val
=
key_map
[
cur_key
];
rec
::
mcube
::
CubeValue
*
cur_val
=
key_map
[
cur_key
];
if
(
cur_val
->
buff
.
size
()
==
0
)
{
if
(
cur_val
->
buff
.
size
()
==
0
)
{
memset
(
data_ptr
,
(
float
)
0.0
,
sizeof
(
float
)
*
EMBEDDING_SIZE
);
memset
(
data_ptr
,
(
float
)
0.0
,
sizeof
(
float
)
*
EMBEDDING_SIZE
);
VLOG
(
3
)
<<
"(logid="
<<
log_id
<<
") cube key not found: "
<<
keys
[
cube_val_idx
];
++
cube_key_miss
;
++
cube_key_miss
;
++
cube_val_idx
;
++
cube_val_idx
;
continue
;
continue
;
}
}
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") key: "
<<
keys
[
cube_val_idx
]
<<
" , cube value len:"
<<
cur_val
->
buff
.
size
();
// The data generated by pslib has 10 bytes of information to be filtered
memcpy
(
data_ptr
,
cur_val
->
buff
.
data
(),
cur_val
->
buff
.
size
());
// out
memcpy
(
data_ptr
,
cur_val
->
buff
.
data
()
+
10
,
cur_val
->
buff
.
size
()
-
10
);
// VLOG(3) << keys[cube_val_idx] << ":" << data_ptr[0] << ", " <<
// VLOG(3) << keys[cube_val_idx] << ":" << data_ptr[0] << ", " <<
// data_ptr[1] << ", " <<data_ptr[2] << ", " <<data_ptr[3] << ", "
// data_ptr[1] << ", " <<data_ptr[2] << ", " <<data_ptr[3] << ", "
// <<data_ptr[4] << ", " <<data_ptr[5] << ", " <<data_ptr[6] << ", "
// <<data_ptr[4] << ", " <<data_ptr[5] << ", " <<data_ptr[6] << ", "
...
@@ -255,8 +254,6 @@ int GeneralDistKVInferOp::inference() {
...
@@ -255,8 +254,6 @@ int GeneralDistKVInferOp::inference() {
bool
cube_fail
=
(
cube_key_found
==
0
);
bool
cube_fail
=
(
cube_key_found
==
0
);
if
(
cube_fail
)
{
if
(
cube_fail
)
{
LOG
(
WARNING
)
<<
"(logid="
<<
log_id
<<
") cube seek fail"
;
LOG
(
WARNING
)
<<
"(logid="
<<
log_id
<<
") cube seek fail"
;
// CopyBlobInfo(input_blob, output_blob);
// return 0;
}
}
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") cube key found: "
<<
cube_key_found
VLOG
(
2
)
<<
"(logid="
<<
log_id
<<
") cube key found: "
<<
cube_key_found
<<
" , cube key miss: "
<<
cube_key_miss
;
<<
" , cube key miss: "
<<
cube_key_miss
;
...
...
core/predictor/framework/cache.cpp
浏览文件 @
a5cdb95f
...
@@ -12,12 +12,12 @@
...
@@ -12,12 +12,12 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License
// limitations under the License
#include "core/predictor/framework/cache.h"
#include <dirent.h>
#include <dirent.h>
#include <sys/stat.h>
#include <fstream>
#include <fstream>
#include <string>
#include <string>
#include <sys/stat.h>
#include <utility>
#include <utility>
#include "core/predictor/framework/cache.h"
#include "core/cube/cube-builder/include/cube-builder/seqfile_reader.h"
#include "core/cube/cube-builder/include/cube-builder/seqfile_reader.h"
namespace
baidu
{
namespace
baidu
{
...
@@ -30,8 +30,9 @@ int CubeCache::clear() {
...
@@ -30,8 +30,9 @@ int CubeCache::clear() {
delete
(
it
->
second
);
delete
(
it
->
second
);
it
->
second
=
nullptr
;
it
->
second
=
nullptr
;
}
}
_map_cache
.
clear
();
}
}
_map_cache
.
clear
();
return
0
;
return
0
;
}
}
...
@@ -44,6 +45,7 @@ rec::mcube::CubeValue* CubeCache::get_data(uint64_t key) {
...
@@ -44,6 +45,7 @@ rec::mcube::CubeValue* CubeCache::get_data(uint64_t key) {
}
}
int
CubeCache
::
reload_data
(
const
std
::
string
&
cache_path
)
{
int
CubeCache
::
reload_data
(
const
std
::
string
&
cache_path
)
{
LOG
(
INFO
)
<<
"cube cache is loading data, path: "
<<
cache_path
;
DIR
*
dp
=
nullptr
;
DIR
*
dp
=
nullptr
;
struct
dirent
*
dirp
=
nullptr
;
struct
dirent
*
dirp
=
nullptr
;
struct
stat
st
;
struct
stat
st
;
...
@@ -71,7 +73,7 @@ int CubeCache::reload_data(const std::string& cache_path) {
...
@@ -71,7 +73,7 @@ int CubeCache::reload_data(const std::string& cache_path) {
}
}
// Match the file whose name prefix is 'part-'
// Match the file whose name prefix is 'part-'
if
(
std
::
string
(
dirp
->
d_name
).
find
(
"part-"
)
!=
std
::
string
::
npos
)
{
if
(
std
::
string
(
dirp
->
d_name
).
find
(
"part-"
)
!=
std
::
string
::
npos
)
{
SequenceFileRecordReader
reader
(
dirp
->
d_name
);
SequenceFileRecordReader
reader
(
cache_path
+
"/"
+
dirp
->
d_name
);
if
(
reader
.
open
()
!=
0
)
{
if
(
reader
.
open
()
!=
0
)
{
LOG
(
ERROR
)
<<
"open file failed! "
<<
dirp
->
d_name
;
LOG
(
ERROR
)
<<
"open file failed! "
<<
dirp
->
d_name
;
...
@@ -97,7 +99,7 @@ int CubeCache::reload_data(const std::string& cache_path) {
...
@@ -97,7 +99,7 @@ int CubeCache::reload_data(const std::string& cache_path) {
}
}
rec
::
mcube
::
CubeValue
*
new_value
=
new
rec
::
mcube
::
CubeValue
();
rec
::
mcube
::
CubeValue
*
new_value
=
new
rec
::
mcube
::
CubeValue
();
new_value
->
error
=
0
;
new_value
->
error
=
0
;
new_value
->
buff
=
record
.
value
;
new_value
->
buff
.
swap
(
record
.
value
)
;
_map_cache
.
insert
(
std
::
make_pair
(
key
,
new_value
));
_map_cache
.
insert
(
std
::
make_pair
(
key
,
new_value
));
}
}
...
...
core/predictor/framework/infer.h
浏览文件 @
a5cdb95f
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
#pragma once
#pragma once
#include <pthread.h>
#include <pthread.h>
#include <sys/stat.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/types.h>
#include <unistd.h>
#include <unistd.h>
#include <functional>
#include <functional>
...
@@ -169,8 +170,10 @@ class ReloadableInferEngine : public InferEngine {
...
@@ -169,8 +170,10 @@ class ReloadableInferEngine : public InferEngine {
uint32_t
_infer_batch_size
;
uint32_t
_infer_batch_size
;
// Need to align batch_size in inferring
// Need to align batch_size in inferring
bool
_infer_
batch_alig
n
;
bool
_infer_
overru
n
;
// allow to split request in inferring
bool
_allow_split_request
;
// model version
// model version
uint64_t
_version
;
uint64_t
_version
;
};
};
...
@@ -181,19 +184,23 @@ struct ModelData {
...
@@ -181,19 +184,23 @@ struct ModelData {
ModelData
()
:
current_idx
(
1
)
{
ModelData
()
:
current_idx
(
1
)
{
cores
[
0
]
=
nullptr
;
cores
[
0
]
=
nullptr
;
cores
[
1
]
=
nullptr
;
cores
[
1
]
=
nullptr
;
caches
[
0
]
=
nullptr
;
caches
[
1
]
=
nullptr
;
}
}
~
ModelData
()
{
~
ModelData
()
{
delete
cores
[
0
];
delete
cores
[
0
];
delete
cores
[
1
];
delete
cores
[
1
];
delete
caches
[
0
];
delete
caches
[
1
];
}
}
void
*
get_core
()
{
return
cores
[
current_idx
]
->
get
();
}
void
*
get_core
()
{
return
cores
[
current_idx
]
->
get
();
}
CubeCache
*
get_cache
()
{
return
&
caches
[
current_idx
];
}
CubeCache
*
get_cache
()
{
return
caches
[
current_idx
];
}
EngineCore
*
cores
[
2
];
EngineCore
*
cores
[
2
];
CubeCache
caches
[
2
];
CubeCache
*
caches
[
2
];
uint32_t
current_idx
;
uint32_t
current_idx
;
};
};
...
@@ -239,31 +246,46 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
...
@@ -239,31 +246,46 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
delete
md
->
cores
[
next_idx
];
delete
md
->
cores
[
next_idx
];
}
}
md
->
cores
[
next_idx
]
=
new
(
std
::
nothrow
)
EngineCore
;
md
->
cores
[
next_idx
]
=
new
(
std
::
nothrow
)
EngineCore
;
if
(
nullptr
==
md
->
cores
[
next_idx
])
{
LOG
(
ERROR
)
<<
"Allocating memory failed. "
;
return
-
1
;
}
size_t
gpu_ids_num
=
conf
.
gpu_ids_size
();
size_t
gpu_ids_num
=
conf
.
gpu_ids_size
();
im
::
bsf
::
AutoMutex
lock
(
_mutex
);
im
::
bsf
::
AutoMutex
lock
(
_mutex
);
int
gpu_id
=
-
1
;
int
gpu_id
=
-
1
;
if
(
gpu_ids_num
>
0
)
{
if
(
gpu_ids_num
>
0
)
{
gpu_id
=
conf
.
gpu_ids
(
_gpu_index
%
gpu_ids_num
);
gpu_id
=
conf
.
gpu_ids
(
_gpu_index
%
gpu_ids_num
);
}
}
LOG
(
WARNING
)
<<
"Loading EngineCore["
<<
next_idx
<<
"] ..."
;
if
(
!
md
->
cores
[
next_idx
]
||
if
(
!
md
->
cores
[
next_idx
]
||
md
->
cores
[
next_idx
]
->
create
(
conf
,
gpu_id
)
!=
0
)
{
md
->
cores
[
next_idx
]
->
create
(
conf
,
gpu_id
)
!=
0
)
{
LOG
(
ERROR
)
<<
"Failed create model, path: "
<<
conf
.
model_dir
();
LOG
(
ERROR
)
<<
"Failed create model, path: "
<<
conf
.
model_dir
();
return
-
1
;
return
-
1
;
}
}
_gpu_index
++
;
_gpu_index
++
;
LOG
(
WARNING
)
<<
"
Reload EngineCore["
<<
next_idx
<<
"] finish
."
;
LOG
(
WARNING
)
<<
"
Loading EngineCore["
<<
next_idx
<<
"] done
."
;
// reload cube cache
// reload cube cache
if
(
nullptr
==
md
->
caches
[
next_idx
])
{
md
->
caches
[
next_idx
]
=
new
(
std
::
nothrow
)
CubeCache
;
}
if
(
nullptr
==
md
->
caches
[
next_idx
])
{
LOG
(
ERROR
)
<<
"Allocating memory failed."
;
return
-
1
;
}
LOG
(
WARNING
)
<<
"Loading cube cache["
<<
next_idx
<<
"] ..."
;
std
::
string
model_path
=
conf
.
model_dir
();
std
::
string
model_path
=
conf
.
model_dir
();
if
(
access
(
model_path
.
c_str
(),
F_OK
)
==
0
)
{
if
(
access
(
model_path
.
c_str
(),
F_OK
)
==
0
)
{
std
::
string
cube_cache_path
=
model_path
+
"cube_cache"
;
std
::
string
cube_cache_path
=
model_path
+
"cube_cache"
;
int
reload_cache_ret
=
md
->
caches
[
next_idx
]
.
reload_data
(
cube_cache_path
);
int
reload_cache_ret
=
md
->
caches
[
next_idx
]
->
reload_data
(
cube_cache_path
);
LOG
(
WARNING
)
<<
"
Reload cube cache["
<<
next_idx
<<
"] finish
."
;
LOG
(
WARNING
)
<<
"
Loading cube cache["
<<
next_idx
<<
"] done
."
;
}
else
{
}
else
{
LOG
(
ERROR
)
<<
"model_path "
<<
model_path
LOG
(
ERROR
)
<<
"model_path "
<<
model_path
<<
" is not exits. Ignore cube cache!"
;
<<
" is not exits. Ignore cube cache!"
;
}
}
// switch current_idx
md
->
current_idx
=
next_idx
;
md
->
current_idx
=
next_idx
;
LOG
(
WARNING
)
LOG
(
WARNING
)
<<
"Reload model and cube cache done. switching to current_idx["
<<
"Reload model and cube cache done. switching to current_idx["
...
@@ -369,12 +391,20 @@ class CloneDBReloadableInferEngine
...
@@ -369,12 +391,20 @@ class CloneDBReloadableInferEngine
virtual
int
load_data
(
ModelData
<
EngineCore
>*
md
,
virtual
int
load_data
(
ModelData
<
EngineCore
>*
md
,
const
configure
::
EngineDesc
&
conf
)
{
const
configure
::
EngineDesc
&
conf
)
{
int
tid
=
syscall
(
SYS_gettid
);
uint32_t
next_idx
=
(
md
->
current_idx
+
1
)
%
2
;
uint32_t
next_idx
=
(
md
->
current_idx
+
1
)
%
2
;
if
(
md
->
cores
[
next_idx
])
{
if
(
md
->
cores
[
next_idx
])
{
delete
md
->
cores
[
next_idx
];
delete
md
->
cores
[
next_idx
];
}
}
md
->
cores
[
next_idx
]
=
new
(
std
::
nothrow
)
EngineCore
;
md
->
cores
[
next_idx
]
=
new
(
std
::
nothrow
)
EngineCore
;
if
(
nullptr
==
md
->
caches
[
next_idx
])
{
md
->
caches
[
next_idx
]
=
new
(
std
::
nothrow
)
CubeCache
;
}
if
(
nullptr
==
md
->
cores
[
next_idx
]
||
nullptr
==
md
->
caches
[
next_idx
])
{
LOG
(
ERROR
)
<<
"Allocating memory fail."
;
return
-
1
;
}
// params.dump();
// params.dump();
// gpu_ids_num > 0 is always true.
// gpu_ids_num > 0 is always true.
// if use CPU, gpu_ids = [-1].
// if use CPU, gpu_ids = [-1].
...
@@ -390,20 +420,34 @@ class CloneDBReloadableInferEngine
...
@@ -390,20 +420,34 @@ class CloneDBReloadableInferEngine
}
else
{
}
else
{
gpu_ids_num
=
1
;
gpu_ids_num
=
1
;
}
}
// _gpu_index will be set to be 0, when load() or proc_initial() is called.
// _gpu_index will be set to be 0, when load() or proc_initial() is called.
// _gpu_index < gpu_ids_num, means there are predictors still not create
// _gpu_index < gpu_ids_num, means there are predictors still not create
// on some GPU card.
// on some GPU card.
// so we need to create the predictor.
// so we need to create the predictor.
// _gpu_index >= gpu_ids_num, means each GPU card has already create one.
// _gpu_index >= gpu_ids_num, means each GPU card has already create one.
// so we need to clone the predictor.
// so we need to clone the predictor.
LOG
(
WARNING
)
<<
"tid:"
<<
tid
<<
" Loading clone model ..."
;
if
(
DBReloadableInferEngine
<
EngineCore
>::
_gpu_index
<
gpu_ids_num
)
{
if
(
DBReloadableInferEngine
<
EngineCore
>::
_gpu_index
<
gpu_ids_num
)
{
if
(
!
md
->
cores
[
next_idx
]
||
// create cores
md
->
cores
[
next_idx
]
->
create
(
conf
,
gpu_id
)
!=
0
)
{
if
(
md
->
cores
[
next_idx
]
->
create
(
conf
,
gpu_id
)
!=
0
)
{
LOG
(
ERROR
)
<<
"Failed create model, path: "
<<
conf
.
model_dir
();
LOG
(
ERROR
)
<<
"Failed create model, path: "
<<
conf
.
model_dir
();
return
-
1
;
return
-
1
;
}
}
// create caches
std
::
string
model_path
=
conf
.
model_dir
();
if
(
access
(
model_path
.
c_str
(),
F_OK
)
==
0
)
{
std
::
string
cube_cache_path
=
model_path
+
"cube_cache"
;
int
reload_cache_ret
=
md
->
caches
[
next_idx
]
->
reload_data
(
cube_cache_path
);
LOG
(
WARNING
)
<<
"create cube cache["
<<
next_idx
<<
"] done."
;
}
else
{
LOG
(
WARNING
)
<<
"model_path "
<<
model_path
<<
" is not exits. Ignore cube cache!"
;
}
DBReloadableInferEngine
<
EngineCore
>::
_gpu_index
++
;
DBReloadableInferEngine
<
EngineCore
>::
_gpu_index
++
;
md
->
current_idx
=
next_idx
;
//
md->current_idx = next_idx;
if
(
_cloneTemplate
.
size
()
<
if
(
_cloneTemplate
.
size
()
<
DBReloadableInferEngine
<
EngineCore
>::
_gpu_index
)
{
DBReloadableInferEngine
<
EngineCore
>::
_gpu_index
)
{
_cloneTemplate
.
push_back
(
md
);
_cloneTemplate
.
push_back
(
md
);
...
@@ -414,18 +458,27 @@ class CloneDBReloadableInferEngine
...
@@ -414,18 +458,27 @@ class CloneDBReloadableInferEngine
}
else
{
}
else
{
int
template_index
=
DBReloadableInferEngine
<
EngineCore
>::
_gpu_index
%
int
template_index
=
DBReloadableInferEngine
<
EngineCore
>::
_gpu_index
%
_cloneTemplate
.
size
();
_cloneTemplate
.
size
();
if
(
!
md
->
cores
[
next_idx
]
||
md
->
cores
[
next_idx
]
->
clone
(
// clone cores
if
(
md
->
cores
[
next_idx
]
->
clone
(
_cloneTemplate
[
template_index
]
->
get_core
())
!=
0
)
{
_cloneTemplate
[
template_index
]
->
get_core
())
!=
0
)
{
LOG
(
ERROR
)
<<
"Failed clone model from core"
;
LOG
(
ERROR
)
<<
"Failed clone model from core"
;
return
-
1
;
return
-
1
;
}
}
// clone caches
md
->
caches
[
next_idx
]
=
_cloneTemplate
[
template_index
]
->
get_cache
();
LOG
(
WARNING
)
<<
"tid:"
<<
tid
<<
" clone caches done"
;
DBReloadableInferEngine
<
EngineCore
>::
_gpu_index
++
;
DBReloadableInferEngine
<
EngineCore
>::
_gpu_index
++
;
md
->
current_idx
=
next_idx
;
LOG
(
WARNING
)
<<
"core clone model succ, cur_idx["
<<
md
->
current_idx
<<
"]."
;
}
}
// switch current_idx
md
->
current_idx
=
next_idx
;
LOG
(
WARNING
)
<<
"["
<<
tid
<<
"] Reload clone model and cube cache done. switching to current_idx["
<<
next_idx
<<
"]"
;
return
0
;
return
0
;
}
}
...
@@ -645,6 +698,8 @@ class InferManager {
...
@@ -645,6 +698,8 @@ class InferManager {
const
char
*
file
,
const
char
*
file
,
std
::
shared_ptr
<
int
>
engine_index_ptr
);
std
::
shared_ptr
<
int
>
engine_index_ptr
);
int
set_taskexecutor_num
(
size_t
total_engine_num
);
int
thrd_initialize
();
int
thrd_initialize
();
int
thrd_clear
();
int
thrd_clear
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录