Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleClas
提交
6cd62518
P
PaddleClas
项目概览
PaddlePaddle
/
PaddleClas
大约 1 年 前同步成功
通知
115
Star
4999
Fork
1114
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
19
列表
看板
标记
里程碑
合并请求
6
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleClas
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
19
Issue
19
列表
看板
标记
里程碑
合并请求
6
合并请求
6
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
6cd62518
编写于
8月 29, 2022
作者:
W
Walter
提交者:
GitHub
8月 29, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2236 from RainFrost1/lite_shitu
update build_gallery and add android demo index support
上级
3a4c7861
52ba23c8
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
119 addition
and
81 deletion
+119
-81
deploy/python/build_gallery.py
deploy/python/build_gallery.py
+119
-81
未找到文件。
deploy/python/build_gallery.py
浏览文件 @
6cd62518
...
...
@@ -12,16 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
pickle
import
cv2
import
faiss
import
numpy
as
np
from
tqdm
import
tqdm
import
pickle
from
paddleclas.deploy.utils
import
logger
,
config
from
paddleclas.deploy.python.predict_rec
import
RecPredictor
from
paddleclas.deploy.python.predict_rec
import
RecPredictor
from
paddleclas.deploy.utils
import
config
,
logger
from
tqdm
import
tqdm
def
split_datafile
(
data_file
,
image_root
,
delimiter
=
"
\t
"
):
...
...
@@ -52,6 +50,7 @@ class GalleryBuilder(object):
self
.
config
=
config
self
.
rec_predictor
=
RecPredictor
(
config
)
assert
'IndexProcess'
in
config
.
keys
(),
"Index config not found ... "
self
.
android_demo
=
config
[
"Global"
].
get
(
"android_demo"
,
False
)
self
.
build
(
config
[
'IndexProcess'
])
def
build
(
self
,
config
):
...
...
@@ -70,98 +69,50 @@ class GalleryBuilder(object):
"new"
,
"remove"
,
"append"
],
"Only append, remove and new operation are supported"
if
self
.
android_demo
:
self
.
_create_index_for_android_demo
(
config
,
gallery_features
,
gallery_docs
)
return
# vector.index: faiss index file
# id_map.pkl: use this file to map id to image_doc
index
,
ids
=
None
,
None
if
operation_method
in
[
"remove"
,
"append"
]:
# if remove or append, vector.index and id_map.pkl must exist
assert
os
.
path
.
join
(
config
[
"index_dir"
],
"vector.index"
),
"The vector.index dose not exist in {} when 'index_operation' is not None"
.
format
(
config
[
"index_dir"
])
assert
os
.
path
.
join
(
config
[
"index_dir"
],
"id_map.pkl"
),
"The id_map.pkl dose not exist in {} when 'index_operation' is not None"
.
format
(
config
[
"index_dir"
])
index
=
faiss
.
read_index
(
os
.
path
.
join
(
config
[
"index_dir"
],
"vector.index"
))
with
open
(
os
.
path
.
join
(
config
[
"index_dir"
],
"id_map.pkl"
),
'rb'
)
as
fd
:
ids
=
pickle
.
load
(
fd
)
assert
index
.
ntotal
==
len
(
ids
.
keys
(
)),
"data number in index is not equal in in id_map"
else
:
if
not
os
.
path
.
exists
(
config
[
"index_dir"
]):
os
.
makedirs
(
config
[
"index_dir"
],
exist_ok
=
True
)
# if remove or append, load vector.index and id_map.pkl
index
,
ids
=
self
.
_load_index
(
config
)
index_method
=
config
.
get
(
"index_method"
,
"HNSW32"
)
# if IVF method, cal ivf number automaticlly
if
index_method
==
"IVF"
:
index_method
=
index_method
+
str
(
min
(
int
(
len
(
gallery_images
)
//
8
),
65536
))
+
",Flat"
# for binary index, add B at head of index_method
if
config
[
"dist_type"
]
==
"hamming"
:
index_method
=
"B"
+
index_method
#dist_type
dist_type
=
faiss
.
METRIC_INNER_PRODUCT
if
config
[
"dist_type"
]
==
"IP"
else
faiss
.
METRIC_L2
#build index
if
config
[
"dist_type"
]
==
"hamming"
:
index
=
faiss
.
index_binary_factory
(
config
[
"embedding_size"
],
index_method
)
else
:
index
=
faiss
.
index_factory
(
config
[
"embedding_size"
],
index_method
,
dist_type
)
index
=
faiss
.
IndexIDMap2
(
index
)
ids
=
{}
if
config
[
"index_method"
]
==
"HNSW32"
:
else
:
index_method
,
index
,
ids
=
self
.
_create_index
(
config
)
if
index_method
==
"HNSW32"
:
logger
.
warning
(
"The HNSW32 method dose not support 'remove' operation"
)
if
operation_method
!=
"remove"
:
# calculate id for new data
start_id
=
max
(
ids
.
keys
())
+
1
if
ids
else
0
ids_now
=
(
np
.
arange
(
0
,
len
(
gallery_images
))
+
start_id
).
astype
(
np
.
int64
)
# only train when new index file
if
operation_method
==
"new"
:
if
config
[
"dist_type"
]
==
"hamming"
:
index
.
add
(
gallery_features
)
else
:
index
.
train
(
gallery_features
)
if
not
config
[
"dist_type"
]
==
"hamming"
:
index
.
add_with_ids
(
gallery_features
,
ids_now
)
for
i
,
d
in
zip
(
list
(
ids_now
),
gallery_docs
):
ids
[
i
]
=
d
index
,
ids
=
self
.
_add_gallery
(
index
,
ids
,
gallery_features
,
gallery_docs
,
config
,
operation_method
)
else
:
if
config
[
"index_method"
]
==
"HNSW32"
:
if
index_method
==
"HNSW32"
:
raise
RuntimeError
(
"The index_method: HNSW32 dose not support 'remove' operation"
)
# remove ids in id_map, remove index data in faiss index
remove_ids
=
list
(
filter
(
lambda
k
:
ids
.
get
(
k
)
in
gallery_docs
,
ids
.
keys
()))
remove_ids
=
np
.
asarray
(
remove_ids
)
index
.
remove_ids
(
remove_ids
)
for
k
in
remove_ids
:
del
ids
[
k
]
index
,
ids
=
self
.
_rm_id_in_galllery
(
index
,
ids
,
gallery_docs
)
# store faiss index file and id_map file
if
config
[
"dist_type"
]
==
"hamming"
:
faiss
.
write_index_binary
(
index
,
os
.
path
.
join
(
config
[
"index_dir"
],
"vector.index"
))
else
:
faiss
.
write_index
(
index
,
os
.
path
.
join
(
config
[
"index_dir"
],
"vector.index"
))
with
open
(
os
.
path
.
join
(
config
[
"index_dir"
],
"id_map.pkl"
),
'wb'
)
as
fd
:
pickle
.
dump
(
ids
,
fd
)
self
.
_save_gallery
(
config
,
index
,
ids
)
def
_create_index_for_android_demo
(
self
,
config
,
gallery_features
,
gallery_docs
):
if
not
os
.
path
.
exists
(
config
[
"index_dir"
]):
os
.
makedirs
(
config
[
"index_dir"
],
exist_ok
=
True
)
#build index
index
=
faiss
.
IndexFlatIP
(
config
[
"embedding_size"
])
index
.
add
(
gallery_features
)
# calculate id for data
ids_now
=
(
np
.
arange
(
0
,
len
(
gallery_docs
))).
astype
(
np
.
int64
)
ids
=
{}
for
i
,
d
in
zip
(
list
(
ids_now
),
gallery_docs
):
ids
[
i
]
=
d
self
.
_save_gallery
(
config
,
index
,
ids
)
def
_extract_features
(
self
,
gallery_images
,
config
):
# extract gallery features
...
...
@@ -197,6 +148,93 @@ class GalleryBuilder(object):
return
gallery_features
def
_load_index
(
self
,
config
):
assert
os
.
path
.
join
(
config
[
"index_dir"
],
"vector.index"
),
"The vector.index dose not exist in {} when 'index_operation' is not None"
.
format
(
config
[
"index_dir"
])
assert
os
.
path
.
join
(
config
[
"index_dir"
],
"id_map.pkl"
),
"The id_map.pkl dose not exist in {} when 'index_operation' is not None"
.
format
(
config
[
"index_dir"
])
index
=
faiss
.
read_index
(
os
.
path
.
join
(
config
[
"index_dir"
],
"vector.index"
))
with
open
(
os
.
path
.
join
(
config
[
"index_dir"
],
"id_map.pkl"
),
'rb'
)
as
fd
:
ids
=
pickle
.
load
(
fd
)
assert
index
.
ntotal
==
len
(
ids
.
keys
(
)),
"data number in index is not equal in in id_map"
return
index
,
ids
def
_create_index
(
self
,
config
):
if
not
os
.
path
.
exists
(
config
[
"index_dir"
]):
os
.
makedirs
(
config
[
"index_dir"
],
exist_ok
=
True
)
index_method
=
config
.
get
(
"index_method"
,
"HNSW32"
)
# if IVF method, cal ivf number automaticlly
if
index_method
==
"IVF"
:
index_method
=
index_method
+
str
(
min
(
int
(
len
(
gallery_images
)
//
8
),
65536
))
+
",Flat"
# for binary index, add B at head of index_method
if
config
[
"dist_type"
]
==
"hamming"
:
index_method
=
"B"
+
index_method
#dist_type
dist_type
=
faiss
.
METRIC_INNER_PRODUCT
if
config
[
"dist_type"
]
==
"IP"
else
faiss
.
METRIC_L2
#build index
if
config
[
"dist_type"
]
==
"hamming"
:
index
=
faiss
.
index_binary_factory
(
config
[
"embedding_size"
],
index_method
)
else
:
index
=
faiss
.
index_factory
(
config
[
"embedding_size"
],
index_method
,
dist_type
)
index
=
faiss
.
IndexIDMap2
(
index
)
ids
=
{}
return
index_method
,
index
,
ids
def
_add_gallery
(
self
,
index
,
ids
,
gallery_features
,
gallery_docs
,
config
,
operation_method
):
start_id
=
max
(
ids
.
keys
())
+
1
if
ids
else
0
ids_now
=
(
np
.
arange
(
0
,
len
(
gallery_docs
))
+
start_id
).
astype
(
np
.
int64
)
# only train when new index file
if
operation_method
==
"new"
:
if
config
[
"dist_type"
]
==
"hamming"
:
index
.
add
(
gallery_features
)
else
:
index
.
train
(
gallery_features
)
if
not
config
[
"dist_type"
]
==
"hamming"
:
index
.
add_with_ids
(
gallery_features
,
ids_now
)
for
i
,
d
in
zip
(
list
(
ids_now
),
gallery_docs
):
ids
[
i
]
=
d
return
index
,
ids
def
_rm_id_in_galllery
(
self
,
index
,
ids
,
gallery_docs
):
remove_ids
=
list
(
filter
(
lambda
k
:
ids
.
get
(
k
)
in
gallery_docs
,
ids
.
keys
()))
remove_ids
=
np
.
asarray
(
remove_ids
)
index
.
remove_ids
(
remove_ids
)
for
k
in
remove_ids
:
del
ids
[
k
]
return
index
,
ids
def
_save_gallery
(
self
,
config
,
index
,
ids
):
if
config
[
"dist_type"
]
==
"hamming"
:
faiss
.
write_index_binary
(
index
,
os
.
path
.
join
(
config
[
"index_dir"
],
"vector.index"
))
else
:
faiss
.
write_index
(
index
,
os
.
path
.
join
(
config
[
"index_dir"
],
"vector.index"
))
with
open
(
os
.
path
.
join
(
config
[
"index_dir"
],
"id_map.pkl"
),
'wb'
)
as
fd
:
pickle
.
dump
(
ids
,
fd
)
def
main
(
config
):
GalleryBuilder
(
config
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录