未验证 提交 e1f13ed9 编写于 作者: L littletomatodonkey 提交者: GitHub

update rec-process and quick start doc (#922)

* add nms to rec results

* add nms to rec results

* fix quick start demo and config files

* fix link and typo
上级 1925b2b3
...@@ -28,9 +28,9 @@ RecPostProcess: null ...@@ -28,9 +28,9 @@ RecPostProcess: null
# indexing engine config # indexing engine config
IndexProcess: IndexProcess:
index_path: "./dataset/cartoon_demo_data_v1.0/index/" index_path: "./recognition_demo_data_v1.0/gallery_cartoon/index/"
image_root: "./dataset/cartoon_demo_data_v1.0/" image_root: "./recognition_demo_data_v1.0/gallery_cartoon/"
data_file: "./dataset/cartoon_demo_data_v1.0/data_file.txt" data_file: "./recognition_demo_data_v1.0/gallery_cartoon/data_file.txt"
delimiter: "\t" delimiter: "\t"
dist_type: "IP" dist_type: "IP"
pq_size: 100 pq_size: 100
......
...@@ -26,9 +26,9 @@ RecPostProcess: null ...@@ -26,9 +26,9 @@ RecPostProcess: null
# indexing engine config # indexing engine config
IndexProcess: IndexProcess:
index_path: "./dataset/logo_demo_data_v1.0/index/" index_path: "./recognition_demo_data_v1.0/gallery_logo/index/"
image_root: "./dataset/logo_demo_data_v1.0/" image_root: "./recognition_demo_data_v1.0/gallery_logo/"
data_file: "./dataset/logo_demo_data_v1.0/data_file.txt" data_file: "./recognition_demo_data_v1.0/gallery_logo/data_file.txt"
delimiter: "\t" delimiter: "\t"
dist_type: "IP" dist_type: "IP"
pq_size: 100 pq_size: 100
......
...@@ -26,10 +26,10 @@ RecPostProcess: null ...@@ -26,10 +26,10 @@ RecPostProcess: null
# indexing engine config # indexing engine config
IndexProcess: IndexProcess:
index_path: "./dataset/product_demo_data_v1.0/index" index_path: "./recognition_demo_data_v1.0/gallery_product/index"
image_root: "./dataset/product_demo_data_v1.0" image_root: "./recognition_demo_data_v1.0/gallery_product/"
data_file: "./dataset/product_demo_data_v1.0/data_file.txt" data_file: "./recognition_demo_data_v1.0/gallery_product/data_file.txt"
delimiter: " " delimiter: "\t"
dist_type: "IP" dist_type: "IP"
pq_size: 100 pq_size: 100
embedding_size: 512 embedding_size: 512
...@@ -26,10 +26,10 @@ RecPostProcess: null ...@@ -26,10 +26,10 @@ RecPostProcess: null
# indexing engine config # indexing engine config
IndexProcess: IndexProcess:
index_path: "./dataset/vehicle_demo_data_v1.0/index/" index_path: "./recognition_demo_data_v1.0/gallery_vehicle/index/"
image_root: "./dataset/vehicle_demo_data_v1.0/" image_root: "./recognition_demo_data_v1.0/gallery_vehicle/"
data_file: "./dataset/vehicle_demo_data_v1.0/data_file.txt" data_file: "./recognition_demo_data_v1.0/gallery_vehicle/data_file.txt"
delimiter: " " delimiter: "\t"
dist_type: "IP" dist_type: "IP"
pq_size: 100 pq_size: 100
embedding_size: 512 embedding_size: 512
Global: Global:
infer_imgs: "./dataset/cartoon_demo_data_v1.0/query/" infer_imgs: "./recognition_demo_data_v1.0/test_cartoon"
det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/" det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/"
rec_inference_model_dir: "./models/cartoon_rec_ResNet50_iCartoon_v1.0_infer/" rec_inference_model_dir: "./models/cartoon_rec_ResNet50_iCartoon_v1.0_infer/"
rec_nms_thresold: 0.1
batch_size: 1 batch_size: 1
image_shape: [3, 640, 640] image_shape: [3, 640, 640]
threshold: 0.2 threshold: 0.2
...@@ -49,7 +51,7 @@ RecPreProcess: ...@@ -49,7 +51,7 @@ RecPreProcess:
RecPostProcess: null RecPostProcess: null
IndexProcess: IndexProcess:
index_path: "./dataset/cartoon_demo_data_v1.0/index/" index_path: "./recognition_demo_data_v1.0/gallery_cartoon/index/"
search_budget: 100 search_budget: 100
return_k: 5 return_k: 5
dist_type: "IP" dist_type: "IP"
......
Global: Global:
infer_imgs: "./dataset/logo_demo_data_v1.0/query/logo_auxx-1.jpg" infer_imgs: "./recognition_demo_data_v1.0/gallery/test_logo"
det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/" det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/"
rec_inference_model_dir: "./models/logo_rec_ResNet50_Logo3K_v1.0_infer/" rec_inference_model_dir: "./models/logo_rec_ResNet50_Logo3K_v1.0_infer/"
rec_nms_thresold: 0.3
batch_size: 1 batch_size: 1
image_shape: [3, 640, 640] image_shape: [3, 640, 640]
threshold: 0.2 threshold: 0.2
...@@ -48,7 +50,7 @@ RecPostProcess: null ...@@ -48,7 +50,7 @@ RecPostProcess: null
# indexing engine config # indexing engine config
IndexProcess: IndexProcess:
index_path: "./dataset/logo_demo_data_v1.0/index/" index_path: "./recognition_demo_data_v1.0/gallery_logo/index/"
search_budget: 100 search_budget: 100
return_k: 5 return_k: 5
dist_type: "IP" dist_type: "IP"
......
Global: Global:
infer_imgs: "./dataset/product_demo_data_v1.0/query/wangzai.jpg" infer_imgs: "./recognition_demo_data_v1.0/test_product/daoxiangcunjinzhubing_6.jpg"
det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer" det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer"
rec_inference_model_dir: "./models/product_ResNet50_vd_aliproduct_v1.0_infer" rec_inference_model_dir: "./models/product_ResNet50_vd_aliproduct_v1.0_infer"
rec_nms_thresold: 0.3
batch_size: 1 batch_size: 1
image_shape: [3, 640, 640] image_shape: [3, 640, 640]
threshold: 0.2 threshold: 0.2
...@@ -48,7 +50,7 @@ RecPostProcess: null ...@@ -48,7 +50,7 @@ RecPostProcess: null
# indexing engine config # indexing engine config
IndexProcess: IndexProcess:
index_path: "./dataset/product_demo_data_v1.0/index" index_path: "./recognition_demo_data_v1.0/gallery_product/index"
search_budget: 100 search_budget: 100
return_k: 5 return_k: 5
dist_type: "IP" dist_type: "IP"
......
Global: Global:
infer_imgs: "./dataset/vehicle_demo_data_v1.0/query/" infer_imgs: "./recognition_demo_data_v1.0/test_vehicle/"
det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/" det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/"
rec_inference_model_dir: "./models/vehicle_cls_ResNet50_CompCars_v1.0_infer/" rec_inference_model_dir: "./models/vehicle_cls_ResNet50_CompCars_v1.0_infer/"
rec_nms_thresold: 0.3
batch_size: 1 batch_size: 1
image_shape: [3, 640, 640] image_shape: [3, 640, 640]
threshold: 0.2 threshold: 0.2
...@@ -50,7 +52,7 @@ RecPostProcess: null ...@@ -50,7 +52,7 @@ RecPostProcess: null
# indexing engine config # indexing engine config
IndexProcess: IndexProcess:
index_path: "./dataset/vehicle_demo_data_v1.0/index/" index_path: "./recognition_demo_data_v1.0/gallery_vehicle/index/"
search_budget: 100 search_budget: 100
return_k: 5 return_k: 5
dist_type: "IP" dist_type: "IP"
......
...@@ -39,9 +39,12 @@ def split_datafile(data_file, image_root, delimiter="\t"): ...@@ -39,9 +39,12 @@ def split_datafile(data_file, image_root, delimiter="\t"):
gallery_docs = [] gallery_docs = []
with open(data_file, 'r', encoding='utf-8') as f: with open(data_file, 'r', encoding='utf-8') as f:
lines = f.readlines() lines = f.readlines()
for i, line in enumerate(lines): for _, ori_line in enumerate(lines):
line = line.strip().split(delimiter) line = ori_line.strip().split(delimiter)
text_num = len(line)
assert text_num >= 2, f"line({ori_line}) must be splitted into at least 2 parts, but got {text_num}"
image_file = os.path.join(image_root, line[0]) image_file = os.path.join(image_root, line[0])
image_doc = line[1] image_doc = line[1]
gallery_images.append(image_file) gallery_images.append(image_file)
gallery_docs.append(image_doc) gallery_docs.append(image_doc)
......
...@@ -50,17 +50,51 @@ class SystemPredictor(object): ...@@ -50,17 +50,51 @@ class SystemPredictor(object):
results.append({ results.append({
"class_id": 0, "class_id": 0,
"score": 1.0, "score": 1.0,
"bbox": np.array([0, 0, shape[1], shape[0]]), "bbox":
np.array([0, 0, shape[1], shape[0]]), # xmin, ymin, xmax, ymax
"label_name": "foreground", "label_name": "foreground",
}) })
return results return results
def nms_to_rec_results(self, results, thresh=0.3):
filtered_results = []
x1 = np.array([r["bbox"][0] for r in results]).astype("float32")
y1 = np.array([r["bbox"][1] for r in results]).astype("float32")
x2 = np.array([r["bbox"][2] for r in results]).astype("float32")
y2 = np.array([r["bbox"][3] for r in results]).astype("float32")
scores = np.array([r["rec_scores"] for r in results])
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
while order.size > 0:
i = order[0]
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.minimum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.maximum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
filtered_results.append(results[i])
return filtered_results
def predict(self, img): def predict(self, img):
output = [] output = []
# st1: get all detection results
results = self.det_predictor.predict(img) results = self.det_predictor.predict(img)
# add the whole image for recognition
# st2: add the whole image for recognition to improve recall
results = self.append_self(results, img.shape) results = self.append_self(results, img.shape)
# st3: recognition process, use score_thres to ensure accuracy
for result in results: for result in results:
preds = {} preds = {}
xmin, ymin, xmax, ymax = result["bbox"].astype("int") xmin, ymin, xmax, ymax = result["bbox"].astype("int")
...@@ -75,11 +109,12 @@ class SystemPredictor(object): ...@@ -75,11 +109,12 @@ class SystemPredictor(object):
if scores[0] >= self.config["IndexProcess"]["score_thres"]: if scores[0] >= self.config["IndexProcess"]["score_thres"]:
preds["rec_docs"] = docs[0] preds["rec_docs"] = docs[0]
preds["rec_scores"] = scores[0] preds["rec_scores"] = scores[0]
else: output.append(preds)
preds["rec_docs"] = None
preds["rec_scores"] = 0.0 # st5: nms to the final results to avoid fetching duplicate results
output = self.nms_to_rec_results(
output, self.config["Global"]["rec_nms_thresold"])
output.append(preds)
return output return output
......
...@@ -34,13 +34,16 @@ ...@@ -34,13 +34,16 @@
检测模型与4个方向(Logo、动漫人物、车辆、商品)的识别inference模型、测试数据下载地址以及对应的配置文件地址如下。 检测模型与4个方向(Logo、动漫人物、车辆、商品)的识别inference模型、测试数据下载地址以及对应的配置文件地址如下。
| 模型简介 | 推荐场景 | 测试数据地址 | inference模型 | 预测配置文件 | 构建索引库的配置文件 | | 模型简介 | 推荐场景 | inference模型 | 预测配置文件 | 构建索引库的配置文件 |
| ------------ | ------------- | ------- | -------- | ------- | -------- | | ------------ | ------------- | -------- | ------- | -------- |
| 通用主体检测模型 | 通用场景 | - |[模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar) | - | - | | 通用主体检测模型 | 通用场景 |[模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar) | - | - |
| Logo识别模型 | Logo场景 | [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/logo_demo_data_v1.0.tar) | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/logo_rec_ResNet50_Logo3K_v1.0_infer.tar) | [inference_logo.yaml](../../../deploy/configs/inference_logo.yaml) | [build_logo.yaml](../../../deploy/configs/build_logo.yaml) | | Logo识别模型 | Logo场景 | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/logo_rec_ResNet50_Logo3K_v1.0_infer.tar) | [inference_logo.yaml](../../../deploy/configs/inference_logo.yaml) | [build_logo.yaml](../../../deploy/configs/build_logo.yaml) |
| 动漫人物识别模型 | 动漫人物场景 | [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/cartoon_demo_data_v1.0.tar) | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/cartoon_rec_ResNet50_iCartoon_v1.0_infer.tar) | [inference_cartoon.yaml](../../../deploy/configs/inference_cartoon.yaml) | [build_cartoon.yaml](../../../deploy/configs/build_cartoon.yaml) | | 动漫人物识别模型 | 动漫人物场景 | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/cartoon_rec_ResNet50_iCartoon_v1.0_infer.tar) | [inference_cartoon.yaml](../../../deploy/configs/inference_cartoon.yaml) | [build_cartoon.yaml](../../../deploy/configs/build_cartoon.yaml) |
| 车辆细分类模型 | 车辆场景 | [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/vehicle_demo_data_v1.0.tar) | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/vehicle_cls_ResNet50_CompCars_v1.0_infer.tar) | [inference_vehicle.yaml](../../../deploy/configs/inference_vehicle.yaml) | [build_vehicle.yaml](../../../deploy/configs/build_vehicle.yaml) | | 车辆细分类模型 | 车辆场景 | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/vehicle_cls_ResNet50_CompCars_v1.0_infer.tar) | [inference_vehicle.yaml](../../../deploy/configs/inference_vehicle.yaml) | [build_vehicle.yaml](../../../deploy/configs/build_vehicle.yaml) |
| 商品识别模型 | 商品场景 | [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/product_demo_data_v1.0.tar) | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/product_ResNet50_vd_aliproduct_v1.0_infer.tar) | [inference_product.yaml](../../../deploy/configs/inference_product.yaml) | [build_product.yaml](../../../deploy/configs/build_product.yaml) | | 商品识别模型 | 商品场景 | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/product_ResNet50_vd_aliproduct_v1.0_infer.tar) | [inference_product.yaml](../../../deploy/configs/inference_product.yaml) | [build_product.yaml](../../../deploy/configs/build_product.yaml) |
本章节demo数据下载地址如下: [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/recognition_demo_data_v1.0.tar)
**注意** **注意**
...@@ -60,32 +63,23 @@ brew install wget ...@@ -60,32 +63,23 @@ brew install wget
* 可以按照下面的命令下载并解压数据与模型 * 可以按照下面的命令下载并解压数据与模型
```shell ```shell
mkdir dataset
cd dataset
# 下载demo数据并解压
wget {数据下载链接地址} && tar -xf {压缩包的名称}
cd ..
mkdir models mkdir models
cd models cd models
# 下载识别inference模型并解压 # 下载识别inference模型并解压
wget {模型下载链接地址} && tar -xf {压缩包的名称} wget {模型下载链接地址} && tar -xf {压缩包的名称}
cd .. cd ..
# 下载demo数据并解压
wget {数据下载链接地址} && tar -xf {压缩包的名称}
``` ```
<a name="下载、解压inference_模型与demo数据"></a> <a name="下载、解压inference_模型与demo数据"></a>
### 2.1 下载、解压inference 模型与demo数据 ### 2.1 下载、解压inference 模型与demo数据
以商品识别为例,下载通用检测、识别模型以及商品识别demo数据,命令如下。 以商品识别为例,下载demo数据集以及通用检测、识别模型,命令如下。
```shell ```shell
mkdir dataset
cd dataset
# 下载demo数据并解压
wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/product_demo_data_v1.0.tar && tar -xf product_demo_data_v1.0.tar
cd ..
mkdir models mkdir models
cd models cd models
# 下载通用检测inference模型并解压 # 下载通用检测inference模型并解压
...@@ -93,21 +87,27 @@ wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/infere ...@@ -93,21 +87,27 @@ wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/infere
# 下载识别inference模型并解压 # 下载识别inference模型并解压
wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/product_ResNet50_vd_aliproduct_v1.0_infer.tar && tar -xf product_ResNet50_vd_aliproduct_v1.0_infer.tar wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/product_ResNet50_vd_aliproduct_v1.0_infer.tar && tar -xf product_ResNet50_vd_aliproduct_v1.0_infer.tar
cd .. # 下载demo数据并解压
wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/recognition_demo_data_v1.0.tar && tar -xf recognition_demo_data_v1.0.tar
``` ```
解压完毕后,`dataset`文件夹下应有如下文件结构: 解压完毕后,`recognition_demo_data_v1.0`文件夹下应有如下文件结构:
``` ```
├── product_demo_data_v1.0 ├── recognition_demo_data_v1.0
│ ├── data_file.txt │ ├── gallery_cartoon
│ ├── gallery │ ├── gallery_logo
│ ├── index │ ├── gallery_product
│ └── query │ ├── gallery_vehicle
│ ├── test_cartoon
│ ├── test_logo
│ ├── test_product
│ └── test_vehicle
├── ... ├── ...
``` ```
其中`data_file.txt`是用于构建索引库的图像列表文件,`gallery`文件夹中是所有用于构建索引库的图像原始文件,`index`文件夹中是构建索引库生成的索引文件,`query`是用来测试识别效果的demo图像。 其中`gallery_xxx`文件夹中存放的是用于构建索引库的原始图像,`test_xxx`文件夹中存放的是用于测试识别效果的图像列表。
`models`文件夹下应有如下文件结构: `models`文件夹下应有如下文件结构:
...@@ -131,7 +131,7 @@ cd .. ...@@ -131,7 +131,7 @@ cd ..
<a name="识别单张图像"></a> <a name="识别单张图像"></a>
#### 2.2.1 识别单张图像 #### 2.2.1 识别单张图像
运行下面的命令,对图像`./dataset/product_demo_data_v1.0/query/wangzai.jpg`进行识别与检索 运行下面的命令,对图像`./recognition_demo_data_v1.0/test_product/daoxiangcunjinzhubing_6.jpg`进行识别与检索
```shell ```shell
# 使用下面的命令使用GPU进行预测 # 使用下面的命令使用GPU进行预测
...@@ -140,29 +140,30 @@ python3.7 python/predict_system.py -c configs/inference_product.yaml ...@@ -140,29 +140,30 @@ python3.7 python/predict_system.py -c configs/inference_product.yaml
python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.use_gpu=False python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.use_gpu=False
``` ```
注意:这里使用了默认编译生成的库文件进行特征索引,如果与您的环境不兼容,导致程序报错,可以参考[向量检索教程](../../../deploy/vector_search/README.md)重新编译库文件。
待检索图像如下所示。 待检索图像如下所示。
<div align="center"> <div align="center">
<img src="../../images/recognition/product_demo/wangzai.jpg" width = "400" /> <img src="../../images/recognition/product_demo/query/daoxiangcunjinzhubing_6.jpg" width = "400" />
</div> </div>
最终输出结果如下。 最终输出结果如下。
``` ```
[{'bbox': [305, 226, 776, 930], 'rec_docs': ['旺仔牛奶', '旺仔牛奶', '旺仔牛奶', '旺仔牛奶', '康师傅方便面'], 'rec_scores': array([1328.1072998 , 1185.92248535, 846.88220215, 746.28546143 622.2668457 ])} [{'bbox': [287, 129, 497, 326], 'rec_docs': '稻香村金猪饼', 'rec_scores': 0.8309420943260193}, {'bbox': [99, 242, 313, 426], 'rec_docs': '稻香村金猪饼', 'rec_scores': 0.7245652079582214}]
``` ```
其中bbox表示检测出的主体所在位置,rec_docs表示索引库中与检出主体最相近的若干张图像对应的标签,rec_scores表示对应的相似度。由rec_docs字段可以看出,返回的5个结果中,有4个为`旺仔牛奶`,识别正确 其中bbox表示检测出的主体所在位置,rec_docs表示索引库中与检测框最为相似的类别,rec_scores表示对应的置信度
检测的可视化结果也保存在`output`文件夹下。 检测的可视化结果也保存在`output`文件夹下,对于本章图像,识别结果可视化如下所示
<div align="center"> <div align="center">
<img src="../../images/recognition/product_demo/wangzai_det_result.jpg" width = "400" /> <img src="../../images/recognition/product_demo/result/daoxiangcunjinzhubing_6.jpg" width = "400" />
</div> </div>
<a name="基于文件夹的批量识别"></a> <a name="基于文件夹的批量识别"></a>
#### 2.2.2 基于文件夹的批量识别 #### 2.2.2 基于文件夹的批量识别
...@@ -170,34 +171,47 @@ python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.u ...@@ -170,34 +171,47 @@ python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.u
```shell ```shell
# 使用下面的命令使用GPU进行预测,如果希望使用CPU预测,可以在命令后面添加-o Global.use_gpu=False # 使用下面的命令使用GPU进行预测,如果希望使用CPU预测,可以在命令后面添加-o Global.use_gpu=False
python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./dataset/product_demo_data_v1.0/query/" python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./recognition_demo_data_v1.0/test_product/"
``` ```
终端中会输出该文件夹内所有图像的识别结果,如下所示。
```
...
[{'bbox': [37, 29, 123, 89], 'rec_docs': '香奈儿包', 'rec_scores': 0.6163763999938965}, {'bbox': [153, 96, 235, 175], 'rec_docs': '香奈儿包', 'rec_scores': 0.5279821157455444}]
[{'bbox': [735, 562, 1133, 851], 'rec_docs': '香奈儿包', 'rec_scores': 0.5588355660438538}]
[{'bbox': [124, 50, 230, 129], 'rec_docs': '香奈儿包', 'rec_scores': 0.6980369687080383}]
[{'bbox': [0, 0, 275, 183], 'rec_docs': '香奈儿包', 'rec_scores': 0.5818190574645996}]
[{'bbox': [400, 1179, 905, 1537], 'rec_docs': '香奈儿包', 'rec_scores': 0.9814301133155823}]
[{'bbox': [544, 4, 1482, 932], 'rec_docs': '香奈儿包', 'rec_scores': 0.5143815279006958}]
[{'bbox': [29, 42, 194, 183], 'rec_docs': '香奈儿包', 'rec_scores': 0.9543638229370117}]
...
```
所有图像的识别结果可视化图像也保存在`output`文件夹内。
更多地,可以通过修改`Global.rec_inference_model_dir`字段来更改识别inference模型的路径,通过修改`IndexProcess.index_path`字段来更改索引库索引的路径。 更多地,可以通过修改`Global.rec_inference_model_dir`字段来更改识别inference模型的路径,通过修改`IndexProcess.index_path`字段来更改索引库索引的路径。
<a name="未知类别的图像识别体验"></a> <a name="未知类别的图像识别体验"></a>
## 3. 未知类别的图像识别体验 ## 3. 未知类别的图像识别体验
对图像`./dataset/product_demo_data_v1.0/query/anmuxi.jpg`进行识别,命令如下 对图像`./recognition_demo_data_v1.0/test_product/anmuxi.jpg`进行识别,命令如下
```shell ```shell
# 使用下面的命令使用GPU进行预测,如果希望使用CPU预测,可以在命令后面添加-o Global.use_gpu=False # 使用下面的命令使用GPU进行预测,如果希望使用CPU预测,可以在命令后面添加-o Global.use_gpu=False
python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./dataset/product_demo_data_v1.0/query/anmuxi.jpg" python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./recognition_demo_data_v1.0/test_product/anmuxi.jpg"
``` ```
待检索图像如下所示。 待检索图像如下所示。
<div align="center"> <div align="center">
<img src="../../images/recognition/product_demo/anmuxi.jpg" width = "400" /> <img src="../../images/recognition/product_demo/query/anmuxi.jpg" width = "400" />
</div> </div>
输出结果如下 输出结果为空。
```
[{'bbox': [243, 80, 523, 522], 'rec_docs': ['娃哈哈AD钙奶', '旺仔牛奶', '娃哈哈AD钙奶', '农夫山泉矿泉水', '红牛'], 'rec_scores': array([548.33282471, 411.85687256, 408.39770508, 400.89404297, 360.41540527])}]
```
由于默认的索引库中不包含对应的索引信息,所以这里的识别结果有误,此时我们可以通过构建新的索引库的方式,完成未知类别的图像识别。 由于默认的索引库中不包含对应的索引信息,所以这里的识别结果有误,此时我们可以通过构建新的索引库的方式,完成未知类别的图像识别。
...@@ -206,31 +220,31 @@ python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.i ...@@ -206,31 +220,31 @@ python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.i
<a name="准备新的数据与标签"></a> <a name="准备新的数据与标签"></a>
### 3.1 准备新的数据与标签 ### 3.1 准备新的数据与标签
首先需要将与待检索图像相似的图像列表拷贝到索引库原始图像的文件夹(`./dataset/product_demo_data_v1.0.0/gallery`)中,运行下面的命令拷贝相似图像。 首先需要将与待检索图像相似的图像列表拷贝到索引库原始图像的文件夹(`./recognition_demo_data_v1.0/gallery_product/gallery`)中,运行下面的命令拷贝相似图像。
```shell ```shell
cp -r ../docs/images/recognition/product_demo/gallery/anmuxi ./dataset/product_demo_data_v1.0/gallery/ cp -r ../docs/images/recognition/product_demo/gallery/anmuxi ./recognition_demo_data_v1.0/gallery_product/gallery/
``` ```
然后需要编辑记录了图像路径和标签信息的文本文件(`./dataset/product_demo_data_v1.0/data_file.txt`),这里基于原始标签文件,新建一个文件。命令如下。 然后需要编辑记录了图像路径和标签信息的文本文件(`./recognition_demo_data_v1.0/gallery_product/data_file_update.txt`),这里基于原始标签文件,新建一个文件。命令如下。
```shell ```shell
# 复制文件 # 复制文件
cp dataset/product_demo_data_v1.0/data_file.txt dataset/product_demo_data_v1.0/data_file_update.txt cp recognition_demo_data_v1.0/gallery_product/data_file.txt recognition_demo_data_v1.0/gallery_product/data_file_update.txt
``` ```
然后在文件`dataset/product_demo_data_v1.0/data_file_update.txt`中添加以下的信息, 然后在文件`recognition_demo_data_v1.0/gallery_product/data_file_update.txt`中添加以下的信息,
``` ```
gallery/anmuxi/001.jpg 安慕希酸奶 gallery/anmuxi/001.jpg 安慕希酸奶
gallery/anmuxi/002.jpg 安慕希酸奶 gallery/anmuxi/002.jpg 安慕希酸奶
gallery/anmuxi/003.jpg 安慕希酸奶 gallery/anmuxi/003.jpg 安慕希酸奶
gallery/anmuxi/004.jpg 安慕希酸奶 gallery/anmuxi/004.jpg 安慕希酸奶
gallery/anmuxi/005.jpg 安慕希酸奶 gallery/anmuxi/005.jpg 安慕希酸奶
gallery/anmuxi/006.jpg 安慕希酸奶 gallery/anmuxi/006.jpg 安慕希酸奶
``` ```
每一行的文本中,第一个字段表示图像的相对路径,第二个字段表示图像对应的标签信息,中间用`空格符`分隔开 每一行的文本中,第一个字段表示图像的相对路径,第二个字段表示图像对应的标签信息,中间用`tab`键分隔开(注意:有些编辑器会将`tab`自动转换为`空格`,这种情况下会导致文件解析报错)
<a name="建立新的索引库"></a> <a name="建立新的索引库"></a>
...@@ -239,10 +253,10 @@ gallery/anmuxi/006.jpg 安慕希酸奶 ...@@ -239,10 +253,10 @@ gallery/anmuxi/006.jpg 安慕希酸奶
使用下面的命令构建index索引,加速识别后的检索过程。 使用下面的命令构建index索引,加速识别后的检索过程。
```shell ```shell
python3.7 python/build_gallery.py -c configs/build_product.yaml -o IndexProcess.data_file="./dataset/product_demo_data_v1.0/data_file_update.txt" -o IndexProcess.index_path="./dataset/product_demo_data_v1.0/index_update" python3.7 python/build_gallery.py -c configs/build_product.yaml -o IndexProcess.data_file="./recognition_demo_data_v1.0/gallery_product/data_file_update.txt" -o IndexProcess.index_path="./recognition_demo_data_v1.0/gallery_product/index_update"
``` ```
最终新的索引信息保存在文件夹`./dataset/product_demo_data_v1.0/index_update`中。 最终新的索引信息保存在文件夹`./recognition_demo_data_v1.0/gallery_product/index_update`中。
<a name="基于新的索引库的图像识别"></a> <a name="基于新的索引库的图像识别"></a>
...@@ -252,13 +266,17 @@ python3.7 python/build_gallery.py -c configs/build_product.yaml -o IndexProcess. ...@@ -252,13 +266,17 @@ python3.7 python/build_gallery.py -c configs/build_product.yaml -o IndexProcess.
```shell ```shell
# 使用下面的命令使用GPU进行预测,如果希望使用CPU预测,可以在命令后面添加-o Global.use_gpu=False # 使用下面的命令使用GPU进行预测,如果希望使用CPU预测,可以在命令后面添加-o Global.use_gpu=False
python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./dataset/product_demo_data_v1.0/query/anmuxi.jpg" -o IndexProcess.index_path="./dataset/product_demo_data_v1.0/index_update" python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./recognition_demo_data_v1.0/test_product/anmuxi.jpg" -o IndexProcess.index_path="./recognition_demo_data_v1.0/gallery_product/index_update"
``` ```
输出结果如下。 输出结果如下。
``` ```
[{'bbox': [243, 80, 523, 522], 'rec_docs': ['安慕希酸奶', '娃哈哈AD钙奶', '安慕希酸奶', '安慕希酸奶', '安慕希酸奶'], 'rec_scores': array([1214.9597168 , 548.33282471, 547.82104492, 535.13201904, 471.52706909])}] [{'bbox': [243, 80, 523, 522], 'rec_docs': '安慕希酸奶', 'rec_scores': 0.5570770502090454}]
``` ```
返回的5个结果中,有4个为`安慕希酸奶`,识别结果正确。 最终返回结果为`安慕希酸奶`,识别正确,识别结果可视化如下所示。
<div align="center">
<img src="../../images/recognition/product_demo/result/anmuxi.jpg" width = "400" />
</div>
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册