diff --git a/deploy/configs/build_cartoon.yaml b/deploy/configs/build_cartoon.yaml index 4c000de1f0c1b0fd41d50296f39b026745442788..3c93a6faa4f3c3f3e9e5a89eb7060ca265b29c9b 100644 --- a/deploy/configs/build_cartoon.yaml +++ b/deploy/configs/build_cartoon.yaml @@ -28,9 +28,9 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/cartoon_demo_data_v1.0/index/" - image_root: "./dataset/cartoon_demo_data_v1.0/" - data_file: "./dataset/cartoon_demo_data_v1.0/data_file.txt" + index_path: "./recognition_demo_data_v1.0/gallery_cartoon/index/" + image_root: "./recognition_demo_data_v1.0/gallery_cartoon/" + data_file: "./recognition_demo_data_v1.0/gallery_cartoon/data_file.txt" delimiter: "\t" dist_type: "IP" pq_size: 100 diff --git a/deploy/configs/build_logo.yaml b/deploy/configs/build_logo.yaml index 8bcb9be6f2808d381cd728910c78559a8a160dbe..1f3800e2750ccf3290732ac0c022379aedfb0c72 100644 --- a/deploy/configs/build_logo.yaml +++ b/deploy/configs/build_logo.yaml @@ -26,9 +26,9 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/logo_demo_data_v1.0/index/" - image_root: "./dataset/logo_demo_data_v1.0/" - data_file: "./dataset/logo_demo_data_v1.0/data_file.txt" + index_path: "./recognition_demo_data_v1.0/gallery_logo/index/" + image_root: "./recognition_demo_data_v1.0/gallery_logo/" + data_file: "./recognition_demo_data_v1.0/gallery_logo/data_file.txt" delimiter: "\t" dist_type: "IP" pq_size: 100 diff --git a/deploy/configs/build_product.yaml b/deploy/configs/build_product.yaml index ebe2d34c692b4a7de8c560792f8cbae9b52dffad..1ae4c0d53542d4289290b5cf050f99ffc5c6b80a 100644 --- a/deploy/configs/build_product.yaml +++ b/deploy/configs/build_product.yaml @@ -26,10 +26,10 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/product_demo_data_v1.0/index" - image_root: "./dataset/product_demo_data_v1.0" - data_file: "./dataset/product_demo_data_v1.0/data_file.txt" - delimiter: " " + index_path: "./recognition_demo_data_v1.0/gallery_product/index" + image_root: "./recognition_demo_data_v1.0/gallery_product/" + data_file: "./recognition_demo_data_v1.0/gallery_product/data_file.txt" + delimiter: "\t" dist_type: "IP" pq_size: 100 embedding_size: 512 diff --git a/deploy/configs/build_vehicle.yaml b/deploy/configs/build_vehicle.yaml index 2e11a7df04fbd20c35cb9961f37f3a645bf6ea5b..4897f24a4ae7be533d28bc92224a230aac99679e 100644 --- a/deploy/configs/build_vehicle.yaml +++ b/deploy/configs/build_vehicle.yaml @@ -26,10 +26,10 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/vehicle_demo_data_v1.0/index/" - image_root: "./dataset/vehicle_demo_data_v1.0/" - data_file: "./dataset/vehicle_demo_data_v1.0/data_file.txt" - delimiter: " " + index_path: "./recognition_demo_data_v1.0/gallery_vehicle/index/" + image_root: "./recognition_demo_data_v1.0/gallery_vehicle/" + data_file: "./recognition_demo_data_v1.0/gallery_vehicle/data_file.txt" + delimiter: "\t" dist_type: "IP" pq_size: 100 embedding_size: 512 diff --git a/deploy/configs/inference_cartoon.yaml b/deploy/configs/inference_cartoon.yaml index 321ee00e837506ab0583414b416dd7a78ccbdf12..97d84667b2cac0bffc74541f3f1e2d53ccd6fda8 100644 --- a/deploy/configs/inference_cartoon.yaml +++ b/deploy/configs/inference_cartoon.yaml @@ -1,7 +1,9 @@ Global: - infer_imgs: "./dataset/cartoon_demo_data_v1.0/query/" + infer_imgs: "./recognition_demo_data_v1.0/test_cartoon" det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/" rec_inference_model_dir: "./models/cartoon_rec_ResNet50_iCartoon_v1.0_infer/" + rec_nms_thresold: 0.1 + batch_size: 1 image_shape: [3, 640, 640] threshold: 0.2 @@ -49,7 +51,7 @@ RecPreProcess: RecPostProcess: null IndexProcess: - index_path: "./dataset/cartoon_demo_data_v1.0/index/" + index_path: "./recognition_demo_data_v1.0/gallery_cartoon/index/" search_budget: 100 return_k: 5 dist_type: "IP" diff --git a/deploy/configs/inference_logo.yaml b/deploy/configs/inference_logo.yaml index 79eb69a04103e7fb471e6e3a134aefc3658c8961..6152e9cee5328c1516ccb796f2664bd1f665e929 100644 --- a/deploy/configs/inference_logo.yaml +++ b/deploy/configs/inference_logo.yaml @@ -1,7 +1,9 @@ Global: - infer_imgs: "./dataset/logo_demo_data_v1.0/query/logo_auxx-1.jpg" + infer_imgs: "./recognition_demo_data_v1.0/gallery/test_logo" det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/" rec_inference_model_dir: "./models/logo_rec_ResNet50_Logo3K_v1.0_infer/" + rec_nms_thresold: 0.3 + batch_size: 1 image_shape: [3, 640, 640] threshold: 0.2 @@ -48,7 +50,7 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/logo_demo_data_v1.0/index/" + index_path: "./recognition_demo_data_v1.0/gallery_logo/index/" search_budget: 100 return_k: 5 dist_type: "IP" diff --git a/deploy/configs/inference_product.yaml b/deploy/configs/inference_product.yaml index 27ab50ce10b8349f4f34883100821e188d89c18d..1b3291406dc83ecef6c92b4f6e1e83ef9065404c 100644 --- a/deploy/configs/inference_product.yaml +++ b/deploy/configs/inference_product.yaml @@ -1,7 +1,9 @@ Global: - infer_imgs: "./dataset/product_demo_data_v1.0/query/wangzai.jpg" + infer_imgs: "./recognition_demo_data_v1.0/test_product/daoxiangcunjinzhubing_6.jpg" det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer" rec_inference_model_dir: "./models/product_ResNet50_vd_aliproduct_v1.0_infer" + rec_nms_thresold: 0.3 + batch_size: 1 image_shape: [3, 640, 640] threshold: 0.2 @@ -48,7 +50,7 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/product_demo_data_v1.0/index" + index_path: "./recognition_demo_data_v1.0/gallery_product/index" search_budget: 100 return_k: 5 dist_type: "IP" diff --git a/deploy/configs/inference_vehicle.yaml b/deploy/configs/inference_vehicle.yaml index 1c1104f6fb87aff86c1987a80fd1e174c48ac0a6..a3dec869c0f9d9a3dce034cd9d267ca5df5daa1f 100644 --- a/deploy/configs/inference_vehicle.yaml +++ b/deploy/configs/inference_vehicle.yaml @@ -1,7 +1,9 @@ Global: - infer_imgs: "./dataset/vehicle_demo_data_v1.0/query/" + infer_imgs: "./recognition_demo_data_v1.0/test_vehicle/" det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/" rec_inference_model_dir: "./models/vehicle_cls_ResNet50_CompCars_v1.0_infer/" + rec_nms_thresold: 0.3 + batch_size: 1 image_shape: [3, 640, 640] threshold: 0.2 @@ -50,7 +52,7 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/vehicle_demo_data_v1.0/index/" + index_path: "./recognition_demo_data_v1.0/gallery_vehicle/index/" search_budget: 100 return_k: 5 dist_type: "IP" diff --git a/deploy/python/build_gallery.py b/deploy/python/build_gallery.py index 7ff82fb97f182646e8e26d3b24bc8403ea6bb476..2087d9e01b5ae778d9bb4ed46a1169dbca3d83c1 100644 --- a/deploy/python/build_gallery.py +++ b/deploy/python/build_gallery.py @@ -39,9 +39,12 @@ def split_datafile(data_file, image_root, delimiter="\t"): gallery_docs = [] with open(data_file, 'r', encoding='utf-8') as f: lines = f.readlines() - for i, line in enumerate(lines): - line = line.strip().split(delimiter) + for _, ori_line in enumerate(lines): + line = ori_line.strip().split(delimiter) + text_num = len(line) + assert text_num >= 2, f"line({ori_line}) must be splitted into at least 2 parts, but got {text_num}" image_file = os.path.join(image_root, line[0]) + image_doc = line[1] gallery_images.append(image_file) gallery_docs.append(image_doc) diff --git a/deploy/python/predict_system.py b/deploy/python/predict_system.py index 0f30cb45cdc333023c5a06552e5e54301d6e5489..1683733616f2a27010f797fef96e133505035c75 100644 --- a/deploy/python/predict_system.py +++ b/deploy/python/predict_system.py @@ -50,17 +50,51 @@ class SystemPredictor(object): results.append({ "class_id": 0, "score": 1.0, - "bbox": np.array([0, 0, shape[1], shape[0]]), + "bbox": + np.array([0, 0, shape[1], shape[0]]), # xmin, ymin, xmax, ymax "label_name": "foreground", }) return results + def nms_to_rec_results(self, results, thresh=0.3): + filtered_results = [] + x1 = np.array([r["bbox"][0] for r in results]).astype("float32") + y1 = np.array([r["bbox"][1] for r in results]).astype("float32") + x2 = np.array([r["bbox"][2] for r in results]).astype("float32") + y2 = np.array([r["bbox"][3] for r in results]).astype("float32") + scores = np.array([r["rec_scores"] for r in results]) + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + while order.size > 0: + i = order[0] + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.minimum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.maximum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + + filtered_results.append(results[i]) + + return filtered_results + def predict(self, img): output = [] + # st1: get all detection results results = self.det_predictor.predict(img) - # add the whole image for recognition + + # st2: add the whole image for recognition to improve recall results = self.append_self(results, img.shape) + # st3: recognition process, use score_thres to ensure accuracy for result in results: preds = {} xmin, ymin, xmax, ymax = result["bbox"].astype("int") @@ -75,11 +109,12 @@ class SystemPredictor(object): if scores[0] >= self.config["IndexProcess"]["score_thres"]: preds["rec_docs"] = docs[0] preds["rec_scores"] = scores[0] - else: - preds["rec_docs"] = None - preds["rec_scores"] = 0.0 + output.append(preds) + + # st5: nms to the final results to avoid fetching duplicate results + output = self.nms_to_rec_results( + output, self.config["Global"]["rec_nms_thresold"]) - output.append(preds) return output diff --git a/docs/images/recognition/product_demo/anmuxi.jpg b/docs/images/recognition/product_demo/query/anmuxi.jpg similarity index 100% rename from docs/images/recognition/product_demo/anmuxi.jpg rename to docs/images/recognition/product_demo/query/anmuxi.jpg diff --git a/docs/images/recognition/product_demo/query/daoxiangcunjinzhubing_6.jpg b/docs/images/recognition/product_demo/query/daoxiangcunjinzhubing_6.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fc64a9531db0829d42b51e888361fa697afd080f Binary files /dev/null and b/docs/images/recognition/product_demo/query/daoxiangcunjinzhubing_6.jpg differ diff --git a/docs/images/recognition/product_demo/result/anmuxi.jpg b/docs/images/recognition/product_demo/result/anmuxi.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8749fe1cb39b1ce6b6771fffeaa8f383bc267920 Binary files /dev/null and b/docs/images/recognition/product_demo/result/anmuxi.jpg differ diff --git a/docs/images/recognition/product_demo/result/daoxiangcunjinzhubing_6.jpg b/docs/images/recognition/product_demo/result/daoxiangcunjinzhubing_6.jpg new file mode 100644 index 0000000000000000000000000000000000000000..babc6cea1f1145889f7f128908235c49a92d54c4 Binary files /dev/null and b/docs/images/recognition/product_demo/result/daoxiangcunjinzhubing_6.jpg differ diff --git a/docs/zh_CN/tutorials/quick_start_recognition.md b/docs/zh_CN/tutorials/quick_start_recognition.md index d2c3ed145f116e757ff3094dc914448df4d2f390..dc9fa2c0ccb823014f974d0a1cfdd25ea5367ac8 100644 --- a/docs/zh_CN/tutorials/quick_start_recognition.md +++ b/docs/zh_CN/tutorials/quick_start_recognition.md @@ -34,13 +34,16 @@ 检测模型与4个方向(Logo、动漫人物、车辆、商品)的识别inference模型、测试数据下载地址以及对应的配置文件地址如下。 -| 模型简介 | 推荐场景 | 测试数据地址 | inference模型 | 预测配置文件 | 构建索引库的配置文件 | -| ------------ | ------------- | ------- | -------- | ------- | -------- | -| 通用主体检测模型 | 通用场景 | - |[模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar) | - | - | -| Logo识别模型 | Logo场景 | [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/logo_demo_data_v1.0.tar) | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/logo_rec_ResNet50_Logo3K_v1.0_infer.tar) | [inference_logo.yaml](../../../deploy/configs/inference_logo.yaml) | [build_logo.yaml](../../../deploy/configs/build_logo.yaml) | -| 动漫人物识别模型 | 动漫人物场景 | [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/cartoon_demo_data_v1.0.tar) | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/cartoon_rec_ResNet50_iCartoon_v1.0_infer.tar) | [inference_cartoon.yaml](../../../deploy/configs/inference_cartoon.yaml) | [build_cartoon.yaml](../../../deploy/configs/build_cartoon.yaml) | -| 车辆细分类模型 | 车辆场景 | [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/vehicle_demo_data_v1.0.tar) | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/vehicle_cls_ResNet50_CompCars_v1.0_infer.tar) | [inference_vehicle.yaml](../../../deploy/configs/inference_vehicle.yaml) | [build_vehicle.yaml](../../../deploy/configs/build_vehicle.yaml) | -| 商品识别模型 | 商品场景 | [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/product_demo_data_v1.0.tar) | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/product_ResNet50_vd_aliproduct_v1.0_infer.tar) | [inference_product.yaml](../../../deploy/configs/inference_product.yaml) | [build_product.yaml](../../../deploy/configs/build_product.yaml) | +| 模型简介 | 推荐场景 | inference模型 | 预测配置文件 | 构建索引库的配置文件 | +| ------------ | ------------- | -------- | ------- | -------- | +| 通用主体检测模型 | 通用场景 |[模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar) | - | - | +| Logo识别模型 | Logo场景 | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/logo_rec_ResNet50_Logo3K_v1.0_infer.tar) | [inference_logo.yaml](../../../deploy/configs/inference_logo.yaml) | [build_logo.yaml](../../../deploy/configs/build_logo.yaml) | +| 动漫人物识别模型 | 动漫人物场景 | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/cartoon_rec_ResNet50_iCartoon_v1.0_infer.tar) | [inference_cartoon.yaml](../../../deploy/configs/inference_cartoon.yaml) | [build_cartoon.yaml](../../../deploy/configs/build_cartoon.yaml) | +| 车辆细分类模型 | 车辆场景 | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/vehicle_cls_ResNet50_CompCars_v1.0_infer.tar) | [inference_vehicle.yaml](../../../deploy/configs/inference_vehicle.yaml) | [build_vehicle.yaml](../../../deploy/configs/build_vehicle.yaml) | +| 商品识别模型 | 商品场景 | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/product_ResNet50_vd_aliproduct_v1.0_infer.tar) | [inference_product.yaml](../../../deploy/configs/inference_product.yaml) | [build_product.yaml](../../../deploy/configs/build_product.yaml) | + + +本章节demo数据下载地址如下: [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/recognition_demo_data_v1.0.tar)。 **注意** @@ -60,32 +63,23 @@ brew install wget * 可以按照下面的命令下载并解压数据与模型 ```shell -mkdir dataset -cd dataset -# 下载demo数据并解压 -wget {数据下载链接地址} && tar -xf {压缩包的名称} -cd .. - mkdir models cd models # 下载识别inference模型并解压 wget {模型下载链接地址} && tar -xf {压缩包的名称} cd .. + +# 下载demo数据并解压 +wget {数据下载链接地址} && tar -xf {压缩包的名称} ``` ### 2.1 下载、解压inference 模型与demo数据 -以商品识别为例,下载通用检测、识别模型以及商品识别demo数据,命令如下。 +以商品识别为例,下载demo数据集以及通用检测、识别模型,命令如下。 ```shell -mkdir dataset -cd dataset -# 下载demo数据并解压 -wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/product_demo_data_v1.0.tar && tar -xf product_demo_data_v1.0.tar -cd .. - mkdir models cd models # 下载通用检测inference模型并解压 @@ -93,21 +87,27 @@ wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/infere # 下载识别inference模型并解压 wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/product_ResNet50_vd_aliproduct_v1.0_infer.tar && tar -xf product_ResNet50_vd_aliproduct_v1.0_infer.tar -cd .. +# 下载demo数据并解压 +wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/recognition_demo_data_v1.0.tar && tar -xf recognition_demo_data_v1.0.tar ``` -解压完毕后,`dataset`文件夹下应有如下文件结构: +解压完毕后,`recognition_demo_data_v1.0`文件夹下应有如下文件结构: ``` -├── product_demo_data_v1.0 -│ ├── data_file.txt -│ ├── gallery -│ ├── index -│ └── query +├── recognition_demo_data_v1.0 +│ ├── gallery_cartoon +│ ├── gallery_logo +│ ├── gallery_product +│ ├── gallery_vehicle +│ ├── test_cartoon +│ ├── test_logo +│ ├── test_product +│ └── test_vehicle ├── ... ``` -其中`data_file.txt`是用于构建索引库的图像列表文件,`gallery`文件夹中是所有用于构建索引库的图像原始文件,`index`文件夹中是构建索引库生成的索引文件,`query`是用来测试识别效果的demo图像。 +其中`gallery_xxx`文件夹中存放的是用于构建索引库的原始图像,`test_xxx`文件夹中存放的是用于测试识别效果的图像列表。 + `models`文件夹下应有如下文件结构: @@ -131,7 +131,7 @@ cd .. #### 2.2.1 识别单张图像 -运行下面的命令,对图像`./dataset/product_demo_data_v1.0/query/wangzai.jpg`进行识别与检索 +运行下面的命令,对图像`./recognition_demo_data_v1.0/test_product/daoxiangcunjinzhubing_6.jpg`进行识别与检索 ```shell # 使用下面的命令使用GPU进行预测 @@ -140,29 +140,30 @@ python3.7 python/predict_system.py -c configs/inference_product.yaml python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.use_gpu=False ``` +注意:这里使用了默认编译生成的库文件进行特征索引,如果与您的环境不兼容,导致程序报错,可以参考[向量检索教程](../../../deploy/vector_search/README.md)重新编译库文件。 + 待检索图像如下所示。
- +
最终输出结果如下。 ``` -[{'bbox': [305, 226, 776, 930], 'rec_docs': ['旺仔牛奶', '旺仔牛奶', '旺仔牛奶', '旺仔牛奶', '康师傅方便面'], 'rec_scores': array([1328.1072998 , 1185.92248535, 846.88220215, 746.28546143 622.2668457 ])} +[{'bbox': [287, 129, 497, 326], 'rec_docs': '稻香村金猪饼', 'rec_scores': 0.8309420943260193}, {'bbox': [99, 242, 313, 426], 'rec_docs': '稻香村金猪饼', 'rec_scores': 0.7245652079582214}] ``` -其中bbox表示检测出的主体所在位置,rec_docs表示索引库中与检出主体最相近的若干张图像对应的标签,rec_scores表示对应的相似度。由rec_docs字段可以看出,返回的5个结果中,有4个为`旺仔牛奶`,识别正确。 +其中bbox表示检测出的主体所在位置,rec_docs表示索引库中与检测框最为相似的类别,rec_scores表示对应的置信度。 -检测的可视化结果也保存在`output`文件夹下。 +检测的可视化结果也保存在`output`文件夹下,对于本章图像,识别结果可视化如下所示。
- +
- #### 2.2.2 基于文件夹的批量识别 @@ -170,34 +171,47 @@ python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.u ```shell # 使用下面的命令使用GPU进行预测,如果希望使用CPU预测,可以在命令后面添加-o Global.use_gpu=False -python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./dataset/product_demo_data_v1.0/query/" +python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./recognition_demo_data_v1.0/test_product/" ``` +终端中会输出该文件夹内所有图像的识别结果,如下所示。 + +``` +... +[{'bbox': [37, 29, 123, 89], 'rec_docs': '香奈儿包', 'rec_scores': 0.6163763999938965}, {'bbox': [153, 96, 235, 175], 'rec_docs': '香奈儿包', 'rec_scores': 0.5279821157455444}] +[{'bbox': [735, 562, 1133, 851], 'rec_docs': '香奈儿包', 'rec_scores': 0.5588355660438538}] +[{'bbox': [124, 50, 230, 129], 'rec_docs': '香奈儿包', 'rec_scores': 0.6980369687080383}] +[{'bbox': [0, 0, 275, 183], 'rec_docs': '香奈儿包', 'rec_scores': 0.5818190574645996}] +[{'bbox': [400, 1179, 905, 1537], 'rec_docs': '香奈儿包', 'rec_scores': 0.9814301133155823}] +[{'bbox': [544, 4, 1482, 932], 'rec_docs': '香奈儿包', 'rec_scores': 0.5143815279006958}] +[{'bbox': [29, 42, 194, 183], 'rec_docs': '香奈儿包', 'rec_scores': 0.9543638229370117}] +... +``` + +所有图像的识别结果可视化图像也保存在`output`文件夹内。 + + 更多地,可以通过修改`Global.rec_inference_model_dir`字段来更改识别inference模型的路径,通过修改`IndexProcess.index_path`字段来更改索引库索引的路径。 ## 3. 未知类别的图像识别体验 -对图像`./dataset/product_demo_data_v1.0/query/anmuxi.jpg`进行识别,命令如下 +对图像`./recognition_demo_data_v1.0/test_product/anmuxi.jpg`进行识别,命令如下 ```shell # 使用下面的命令使用GPU进行预测,如果希望使用CPU预测,可以在命令后面添加-o Global.use_gpu=False -python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./dataset/product_demo_data_v1.0/query/anmuxi.jpg" +python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./recognition_demo_data_v1.0/test_product/anmuxi.jpg" ``` 待检索图像如下所示。
- +
-输出结果如下 - -``` -[{'bbox': [243, 80, 523, 522], 'rec_docs': ['娃哈哈AD钙奶', '旺仔牛奶', '娃哈哈AD钙奶', '农夫山泉矿泉水', '红牛'], 'rec_scores': array([548.33282471, 411.85687256, 408.39770508, 400.89404297, 360.41540527])}] -``` +输出结果为空。 由于默认的索引库中不包含对应的索引信息,所以这里的识别结果有误,此时我们可以通过构建新的索引库的方式,完成未知类别的图像识别。 @@ -206,31 +220,31 @@ python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.i ### 3.1 准备新的数据与标签 -首先需要将与待检索图像相似的图像列表拷贝到索引库原始图像的文件夹(`./dataset/product_demo_data_v1.0.0/gallery`)中,运行下面的命令拷贝相似图像。 +首先需要将与待检索图像相似的图像列表拷贝到索引库原始图像的文件夹(`./recognition_demo_data_v1.0/gallery_product/gallery`)中,运行下面的命令拷贝相似图像。 ```shell -cp -r ../docs/images/recognition/product_demo/gallery/anmuxi ./dataset/product_demo_data_v1.0/gallery/ +cp -r ../docs/images/recognition/product_demo/gallery/anmuxi ./recognition_demo_data_v1.0/gallery_product/gallery/ ``` -然后需要编辑记录了图像路径和标签信息的文本文件(`./dataset/product_demo_data_v1.0/data_file.txt`),这里基于原始标签文件,新建一个文件。命令如下。 +然后需要编辑记录了图像路径和标签信息的文本文件(`./recognition_demo_data_v1.0/gallery_product/data_file_update.txt`),这里基于原始标签文件,新建一个文件。命令如下。 ```shell # 复制文件 -cp dataset/product_demo_data_v1.0/data_file.txt dataset/product_demo_data_v1.0/data_file_update.txt +cp recognition_demo_data_v1.0/gallery_product/data_file.txt recognition_demo_data_v1.0/gallery_product/data_file_update.txt ``` -然后在文件`dataset/product_demo_data_v1.0/data_file_update.txt`中添加以下的信息, +然后在文件`recognition_demo_data_v1.0/gallery_product/data_file_update.txt`中添加以下的信息, ``` -gallery/anmuxi/001.jpg 安慕希酸奶 -gallery/anmuxi/002.jpg 安慕希酸奶 -gallery/anmuxi/003.jpg 安慕希酸奶 -gallery/anmuxi/004.jpg 安慕希酸奶 -gallery/anmuxi/005.jpg 安慕希酸奶 -gallery/anmuxi/006.jpg 安慕希酸奶 +gallery/anmuxi/001.jpg 安慕希酸奶 +gallery/anmuxi/002.jpg 安慕希酸奶 +gallery/anmuxi/003.jpg 安慕希酸奶 +gallery/anmuxi/004.jpg 安慕希酸奶 +gallery/anmuxi/005.jpg 安慕希酸奶 +gallery/anmuxi/006.jpg 安慕希酸奶 ``` -每一行的文本中,第一个字段表示图像的相对路径,第二个字段表示图像对应的标签信息,中间用`空格符`分隔开。 +每一行的文本中,第一个字段表示图像的相对路径,第二个字段表示图像对应的标签信息,中间用`tab`键分隔开(注意:有些编辑器会将`tab`自动转换为`空格`,这种情况下会导致文件解析报错)。 @@ -239,10 +253,10 @@ gallery/anmuxi/006.jpg 安慕希酸奶 使用下面的命令构建index索引,加速识别后的检索过程。 ```shell -python3.7 python/build_gallery.py -c configs/build_product.yaml -o IndexProcess.data_file="./dataset/product_demo_data_v1.0/data_file_update.txt" -o IndexProcess.index_path="./dataset/product_demo_data_v1.0/index_update" +python3.7 python/build_gallery.py -c configs/build_product.yaml -o IndexProcess.data_file="./recognition_demo_data_v1.0/gallery_product/data_file_update.txt" -o IndexProcess.index_path="./recognition_demo_data_v1.0/gallery_product/index_update" ``` -最终新的索引信息保存在文件夹`./dataset/product_demo_data_v1.0/index_update`中。 +最终新的索引信息保存在文件夹`./recognition_demo_data_v1.0/gallery_product/index_update`中。 @@ -252,13 +266,17 @@ python3.7 python/build_gallery.py -c configs/build_product.yaml -o IndexProcess. ```shell # 使用下面的命令使用GPU进行预测,如果希望使用CPU预测,可以在命令后面添加-o Global.use_gpu=False -python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./dataset/product_demo_data_v1.0/query/anmuxi.jpg" -o IndexProcess.index_path="./dataset/product_demo_data_v1.0/index_update" +python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./recognition_demo_data_v1.0/test_product/anmuxi.jpg" -o IndexProcess.index_path="./recognition_demo_data_v1.0/gallery_product/index_update" ``` 输出结果如下。 ``` -[{'bbox': [243, 80, 523, 522], 'rec_docs': ['安慕希酸奶', '娃哈哈AD钙奶', '安慕希酸奶', '安慕希酸奶', '安慕希酸奶'], 'rec_scores': array([1214.9597168 , 548.33282471, 547.82104492, 535.13201904, 471.52706909])}] +[{'bbox': [243, 80, 523, 522], 'rec_docs': '安慕希酸奶', 'rec_scores': 0.5570770502090454}] ``` -返回的5个结果中,有4个为`安慕希酸奶`,识别结果正确。 +最终返回结果为`安慕希酸奶`,识别正确,识别结果可视化如下所示。 + +
+ +