Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleClas
提交
5690456e
P
PaddleClas
项目概览
PaddlePaddle
/
PaddleClas
大约 1 年 前同步成功
通知
115
Star
4999
Fork
1114
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
19
列表
看板
标记
里程碑
合并请求
6
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleClas
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
19
Issue
19
列表
看板
标记
里程碑
合并请求
6
合并请求
6
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5690456e
编写于
9月 08, 2021
作者:
S
stephon
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support for binary index build and search
上级
a368e3eb
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
225 addition
and
20 deletion
+225
-20
deploy/configs/build_product_binary.yaml
deploy/configs/build_product_binary.yaml
+40
-0
deploy/configs/inference_product_binary.yaml
deploy/configs/inference_product_binary.yaml
+60
-0
deploy/python/build_gallery.py
deploy/python/build_gallery.py
+40
-14
deploy/python/postprocess.py
deploy/python/postprocess.py
+40
-0
deploy/python/predict_system.py
deploy/python/predict_system.py
+9
-2
ppcls/arch/gears/identity_head.py
ppcls/arch/gears/identity_head.py
+28
-2
ppcls/configs/Products/MV3_Large_1x_Aliproduct_DLBHC.yaml
ppcls/configs/Products/MV3_Large_1x_Aliproduct_DLBHC.yaml
+2
-1
ppcls/engine/engine.py
ppcls/engine/engine.py
+6
-1
未找到文件。
deploy/configs/build_product_binary.yaml
0 → 100644
浏览文件 @
5690456e
Global
:
#rec_inference_model_dir: "./models/product_ResNet50_vd_aliproduct_v1.0_infer"
rec_inference_model_dir
:
"
../inference"
batch_size
:
32
use_gpu
:
True
enable_mkldnn
:
True
cpu_num_threads
:
10
enable_benchmark
:
True
use_fp16
:
False
ir_optim
:
True
use_tensorrt
:
False
gpu_mem
:
8000
enable_profile
:
False
RecPreProcess
:
transform_ops
:
-
ResizeImage
:
size
:
224
-
NormalizeImage
:
scale
:
0.00392157
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
-
ToCHWImage
:
RecPostProcess
:
main_indicator
:
Binarize
Binarize
:
method
:
"
round"
# indexing engine config
IndexProcess
:
index_method
:
"
Flat"
# supported: HNSW32, Flat
index_dir
:
"
./recognition_demo_data_v1.1/gallery_product/index_binary"
image_root
:
"
./recognition_demo_data_v1.1/gallery_product/"
data_file
:
"
./recognition_demo_data_v1.1/gallery_product/data_file.txt"
index_operation
:
"
new"
# suported: "append", "remove", "new"
delimiter
:
"
\t
"
dist_type
:
"
hamming"
embedding_size
:
512
deploy/configs/inference_product_binary.yaml
0 → 100644
浏览文件 @
5690456e
Global
:
infer_imgs
:
"
./recognition_demo_data_v1.1/test_product/daoxiangcunjinzhubing_6.jpg"
det_inference_model_dir
:
"
./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer"
rec_inference_model_dir
:
"
./models/product_ResNet50_vd_aliproduct_v1.0_infer"
rec_nms_thresold
:
0.05
batch_size
:
1
image_shape
:
[
3
,
640
,
640
]
threshold
:
0.2
max_det_results
:
5
labe_list
:
-
foreground
# inference engine config
use_gpu
:
True
enable_mkldnn
:
True
cpu_num_threads
:
10
enable_benchmark
:
True
use_fp16
:
False
ir_optim
:
True
use_tensorrt
:
False
gpu_mem
:
8000
enable_profile
:
False
DetPreProcess
:
transform_ops
:
-
DetResize
:
interp
:
2
keep_ratio
:
false
target_size
:
[
640
,
640
]
-
DetNormalizeImage
:
is_scale
:
true
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
-
DetPermute
:
{}
DetPostProcess
:
{}
RecPreProcess
:
transform_ops
:
-
ResizeImage
:
size
:
224
-
NormalizeImage
:
scale
:
0.00392157
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
'
-
ToCHWImage
:
RecPostProcess
:
main_indicator
:
Binarize
Binarize
:
method
:
"
round"
# indexing engine config
IndexProcess
:
binary_index
:
true
index_dir
:
"
./recognition_demo_data_v1.1/gallery_product/index_binary"
return_k
:
5
score_thres
:
0
deploy/python/build_gallery.py
浏览文件 @
5690456e
...
...
@@ -28,7 +28,6 @@ from python.predict_rec import RecPredictor
from
utils
import
logger
from
utils
import
config
def
split_datafile
(
data_file
,
image_root
,
delimiter
=
"
\t
"
):
'''
data_file: image path and info, which can be splitted by spacer
...
...
@@ -70,8 +69,8 @@ class GalleryBuilder(object):
# when remove data in index, do not need extract fatures
if
operation_method
!=
"remove"
:
gallery_features
=
self
.
_extract_features
(
gallery_images
,
config
)
gallery_features
=
self
.
_extract_features
(
gallery_images
,
config
)
#76 * 512
assert
operation_method
in
[
"new"
,
"remove"
,
"append"
],
"Only append, remove and new operation are supported"
...
...
@@ -104,11 +103,22 @@ class GalleryBuilder(object):
if
index_method
==
"IVF"
:
index_method
=
index_method
+
str
(
min
(
int
(
len
(
gallery_images
)
//
8
),
65536
))
+
",Flat"
# for binary index, add B at head of index_method
if
config
[
"dist_type"
]
==
"hamming"
:
index_method
=
"B"
+
index_method
#dist_type
dist_type
=
faiss
.
METRIC_INNER_PRODUCT
if
config
[
"dist_type"
]
==
"IP"
else
faiss
.
METRIC_L2
index
=
faiss
.
index_factory
(
config
[
"embedding_size"
],
index_method
,
dist_type
)
index
=
faiss
.
IndexIDMap2
(
index
)
#build index
if
config
[
"dist_type"
]
==
"hamming"
:
index
=
faiss
.
index_binary_factory
(
config
[
"embedding_size"
],
index_method
)
else
:
index
=
faiss
.
index_factory
(
config
[
"embedding_size"
],
index_method
,
dist_type
)
index
=
faiss
.
IndexIDMap2
(
index
)
ids
=
{}
if
config
[
"index_method"
]
==
"HNSW32"
:
...
...
@@ -119,12 +129,17 @@ class GalleryBuilder(object):
# calculate id for new data
start_id
=
max
(
ids
.
keys
())
+
1
if
ids
else
0
ids_now
=
(
np
.
arange
(
0
,
len
(
gallery_images
))
+
start_id
).
astype
(
np
.
int64
)
np
.
arange
(
0
,
len
(
gallery_images
))
+
start_id
).
astype
(
np
.
int64
)
#ids: just the number sequence
# only train when new index file
if
operation_method
==
"new"
:
index
.
train
(
gallery_features
)
index
.
add_with_ids
(
gallery_features
,
ids_now
)
if
config
[
"dist_type"
]
==
"hamming"
:
index
.
add
(
gallery_features
)
else
:
index
.
train
(
gallery_features
)
if
not
config
[
"dist_type"
]
==
"hamming"
:
index
.
add_with_ids
(
gallery_features
,
ids_now
)
for
i
,
d
in
zip
(
list
(
ids_now
),
gallery_docs
):
ids
[
i
]
=
d
...
...
@@ -142,15 +157,25 @@ class GalleryBuilder(object):
del
ids
[
k
]
# store faiss index file and id_map file
faiss
.
write_index
(
index
,
os
.
path
.
join
(
config
[
"index_dir"
],
"vector.index"
))
if
config
[
"dist_type"
]
==
"hamming"
:
faiss
.
write_index_binary
(
index
,
os
.
path
.
join
(
config
[
"index_dir"
],
"vector.index"
))
else
:
faiss
.
write_index
(
index
,
os
.
path
.
join
(
config
[
"index_dir"
],
"vector.index"
))
with
open
(
os
.
path
.
join
(
config
[
"index_dir"
],
"id_map.pkl"
),
'wb'
)
as
fd
:
pickle
.
dump
(
ids
,
fd
)
def
_extract_features
(
self
,
gallery_images
,
config
):
# extract gallery features
gallery_features
=
np
.
zeros
(
[
len
(
gallery_images
),
config
[
'embedding_size'
]],
dtype
=
np
.
float32
)
if
config
[
"dist_type"
]
==
"hamming"
:
gallery_features
=
np
.
zeros
(
[
len
(
gallery_images
),
config
[
'embedding_size'
]
//
8
],
dtype
=
np
.
uint8
)
else
:
gallery_features
=
np
.
zeros
(
[
len
(
gallery_images
),
config
[
'embedding_size'
]],
dtype
=
np
.
float32
)
#construct batch imgs and do inference
batch_size
=
config
.
get
(
"batch_size"
,
32
)
...
...
@@ -164,7 +189,7 @@ class GalleryBuilder(object):
batch_img
.
append
(
img
)
if
(
i
+
1
)
%
batch_size
==
0
:
rec_feat
=
self
.
rec_predictor
.
predict
(
batch_img
)
rec_feat
=
self
.
rec_predictor
.
predict
(
batch_img
)
#32 * 512
gallery_features
[
i
-
batch_size
+
1
:
i
+
1
,
:]
=
rec_feat
batch_img
=
[]
...
...
@@ -172,6 +197,7 @@ class GalleryBuilder(object):
rec_feat
=
self
.
rec_predictor
.
predict
(
batch_img
)
gallery_features
[
-
len
(
batch_img
):,
:]
=
rec_feat
batch_img
=
[]
return
gallery_features
...
...
deploy/python/postprocess.py
浏览文件 @
5690456e
...
...
@@ -62,6 +62,7 @@ class Topk(object):
def
parse_class_id_map
(
self
,
class_id_map_file
):
if
class_id_map_file
is
None
:
return
None
if
not
os
.
path
.
exists
(
class_id_map_file
):
print
(
"Warning: If want to use your own label_dict, please input legal path!
\n
Otherwise label_names will be empty!"
...
...
@@ -126,3 +127,42 @@ class SavePreLabel(object):
output_dir
=
self
.
save_dir
(
str
(
id
))
os
.
makedirs
(
output_dir
,
exist_ok
=
True
)
shutil
.
copy
(
image_file
,
output_dir
)
class
Binarize
(
object
):
def
__init__
(
self
,
method
=
"round"
):
self
.
method
=
method
self
.
unit
=
np
.
array
([[
128
,
64
,
32
,
16
,
8
,
4
,
2
,
1
]]).
T
def
__call__
(
self
,
x
,
file_names
=
None
):
if
self
.
method
==
"round"
:
x
=
np
.
round
(
x
+
1
).
astype
(
"uint8"
)
-
1
if
self
.
method
==
"sign"
:
x
=
((
np
.
sign
(
x
)
+
1
)
/
2
).
astype
(
"uint8"
)
embedding_size
=
x
.
shape
[
1
]
assert
embedding_size
%
8
==
0
,
"The Binary index only support vectors with sizes multiple of 8"
byte
=
np
.
zeros
([
x
.
shape
[
0
],
embedding_size
//
8
],
dtype
=
np
.
uint8
)
for
i
in
range
(
embedding_size
//
8
):
byte
[:,
i
:
i
+
1
]
=
np
.
dot
(
x
[:,
i
*
8
:
(
i
+
1
)
*
8
],
self
.
unit
)
return
byte
if
__name__
==
"__main__"
:
a
=
Binarize
()
x
=
np
.
random
.
random
((
31
,
64
)).
astype
(
'float32'
)
y
=
a
(
x
)
print
(
y
)
print
(
y
.
shape
)
deploy/python/predict_system.py
浏览文件 @
5690456e
...
...
@@ -47,8 +47,14 @@ class SystemPredictor(object):
index_dir
,
"vector.index"
)),
"vector.index not found ..."
assert
os
.
path
.
exists
(
os
.
path
.
join
(
index_dir
,
"id_map.pkl"
)),
"id_map.pkl not found ... "
self
.
Searcher
=
faiss
.
read_index
(
os
.
path
.
join
(
index_dir
,
"vector.index"
))
if
config
[
'IndexProcess'
].
get
(
"binary_index"
,
False
):
self
.
Searcher
=
faiss
.
read_index_binary
(
os
.
path
.
join
(
index_dir
,
"vector.index"
))
else
:
self
.
Searcher
=
faiss
.
read_index
(
os
.
path
.
join
(
index_dir
,
"vector.index"
))
with
open
(
os
.
path
.
join
(
index_dir
,
"id_map.pkl"
),
"rb"
)
as
fd
:
self
.
id_map
=
pickle
.
load
(
fd
)
...
...
@@ -105,6 +111,7 @@ class SystemPredictor(object):
rec_results
=
self
.
rec_predictor
.
predict
(
crop_img
)
preds
[
"bbox"
]
=
[
xmin
,
ymin
,
xmax
,
ymax
]
scores
,
docs
=
self
.
Searcher
.
search
(
rec_results
,
self
.
return_k
)
# just top-1 result will be returned for the final
if
scores
[
0
][
0
]
>=
self
.
config
[
"IndexProcess"
][
"score_thres"
]:
preds
[
"rec_docs"
]
=
self
.
id_map
[
docs
[
0
][
0
]].
split
()[
1
]
...
...
ppcls/arch/gears/identity_head.py
浏览文件 @
5690456e
from
paddle
import
nn
import
paddle
class
IdentityHead
(
nn
.
Layer
):
def
__init__
(
self
):
def
__init__
(
self
,
binarize_method
=
"none"
,
embedding_size
=
256
):
super
(
IdentityHead
,
self
).
__init__
()
self
.
binarize_method
=
binarize_method
self
.
embedding_size
=
embedding_size
self
.
multiplier
=
self
.
_init_multiplier
(
embedding_size
)
def
forward
(
self
,
x
,
label
=
None
):
if
self
.
binarize_method
==
"round"
:
x
=
paddle
.
round
(
x
)
if
self
.
binarize_method
==
"sign"
:
x
=
(
paddle
.
sign
(
x
)
+
1.0
)
/
2.0
if
self
.
binarize_method
==
"round"
or
self
.
binarize_method
==
"sign"
:
x
=
self
.
_binary_to_byte
(
x
,
self
.
multiplier
)
return
{
"features"
:
x
,
"logits"
:
None
}
def
_init_multiplier
(
self
,
embedding_size
):
unit
=
paddle
.
to_tensor
([
128
,
64
,
32
,
16
,
8
,
4
,
2
,
1
])
repeat
=
embedding_size
//
8
assert
embedding_size
%
8
==
0
,
"The binary index only support vectors with sizes multiple of 8"
unit
=
paddle
.
broadcast_to
(
unit
,
shape
=
[
repeat
,
8
])
multiplier
=
paddle
.
reshape
(
unit
,
shape
=
[
1
,
-
1
]).
astype
(
"float32"
)
return
multiplier
def
_binary_to_byte
(
self
,
input_tensor
,
multiplier
):
tmp
=
paddle
.
multiply
(
input_tensor
,
multiplier
)
tmp
=
paddle
.
reshape
(
tmp
,
shape
=
[
tmp
.
shape
[
0
],
-
1
,
8
])
byte
=
paddle
.
sum
(
tmp
,
axis
=-
1
).
astype
(
"uint8"
)
return
byte
\ No newline at end of file
ppcls/configs/Products/MV3_Large_1x_Aliproduct_DLBHC.yaml
浏览文件 @
5690456e
# global configs
Global
:
checkpoints
:
null
pretrained_model
:
nul
l
pretrained_model
:
./output_product_binary/best_mode
l
output_dir
:
./output_dlbhc/
device
:
gpu
save_interval
:
1
...
...
@@ -34,6 +34,7 @@ Arch:
infer_output_key
:
"
features"
infer_add_softmax
:
"
false"
infer_binarize
:
"
round"
# loss function config for train/eval process
Loss
:
...
...
ppcls/engine/engine.py
浏览文件 @
5690456e
...
...
@@ -378,7 +378,9 @@ class ExportModel(nn.Layer):
self
.
infer_output_key
=
config
.
get
(
"infer_output_key"
,
None
)
if
self
.
infer_output_key
==
"features"
and
isinstance
(
self
.
base_model
,
RecModel
):
self
.
base_model
.
head
=
IdentityHead
()
embedding_size
=
config
[
"Head"
][
"embedding_size"
]
self
.
base_model
.
head
=
IdentityHead
(
config
.
get
(
"infer_binarize"
,
"none"
),
embedding_size
)
if
config
.
get
(
"infer_add_softmax"
,
True
):
self
.
softmax
=
nn
.
Softmax
(
axis
=-
1
)
else
:
...
...
@@ -394,10 +396,13 @@ class ExportModel(nn.Layer):
x
=
self
.
base_model
(
x
)
if
isinstance
(
x
,
list
):
x
=
x
[
0
]
if
self
.
infer_model_name
is
not
None
:
x
=
x
[
self
.
infer_model_name
]
if
self
.
infer_output_key
is
not
None
:
x
=
x
[
self
.
infer_output_key
]
if
self
.
softmax
is
not
None
:
x
=
self
.
softmax
(
x
)
return
x
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录