diff --git a/deploy/configs/build_cartoon.yaml b/deploy/configs/build_cartoon.yaml index 4c000de1f0c1b0fd41d50296f39b026745442788..3c93a6faa4f3c3f3e9e5a89eb7060ca265b29c9b 100644 --- a/deploy/configs/build_cartoon.yaml +++ b/deploy/configs/build_cartoon.yaml @@ -28,9 +28,9 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/cartoon_demo_data_v1.0/index/" - image_root: "./dataset/cartoon_demo_data_v1.0/" - data_file: "./dataset/cartoon_demo_data_v1.0/data_file.txt" + index_path: "./recognition_demo_data_v1.0/gallery_cartoon/index/" + image_root: "./recognition_demo_data_v1.0/gallery_cartoon/" + data_file: "./recognition_demo_data_v1.0/gallery_cartoon/data_file.txt" delimiter: "\t" dist_type: "IP" pq_size: 100 diff --git a/deploy/configs/build_logo.yaml b/deploy/configs/build_logo.yaml index 8bcb9be6f2808d381cd728910c78559a8a160dbe..1f3800e2750ccf3290732ac0c022379aedfb0c72 100644 --- a/deploy/configs/build_logo.yaml +++ b/deploy/configs/build_logo.yaml @@ -26,9 +26,9 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/logo_demo_data_v1.0/index/" - image_root: "./dataset/logo_demo_data_v1.0/" - data_file: "./dataset/logo_demo_data_v1.0/data_file.txt" + index_path: "./recognition_demo_data_v1.0/gallery_logo/index/" + image_root: "./recognition_demo_data_v1.0/gallery_logo/" + data_file: "./recognition_demo_data_v1.0/gallery_logo/data_file.txt" delimiter: "\t" dist_type: "IP" pq_size: 100 diff --git a/deploy/configs/build_product.yaml b/deploy/configs/build_product.yaml index ebe2d34c692b4a7de8c560792f8cbae9b52dffad..1ae4c0d53542d4289290b5cf050f99ffc5c6b80a 100644 --- a/deploy/configs/build_product.yaml +++ b/deploy/configs/build_product.yaml @@ -26,10 +26,10 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/product_demo_data_v1.0/index" - image_root: "./dataset/product_demo_data_v1.0" - data_file: "./dataset/product_demo_data_v1.0/data_file.txt" - delimiter: " " + index_path: "./recognition_demo_data_v1.0/gallery_product/index" + image_root: "./recognition_demo_data_v1.0/gallery_product/" + data_file: "./recognition_demo_data_v1.0/gallery_product/data_file.txt" + delimiter: "\t" dist_type: "IP" pq_size: 100 embedding_size: 512 diff --git a/deploy/configs/build_vehicle.yaml b/deploy/configs/build_vehicle.yaml index 2e11a7df04fbd20c35cb9961f37f3a645bf6ea5b..4897f24a4ae7be533d28bc92224a230aac99679e 100644 --- a/deploy/configs/build_vehicle.yaml +++ b/deploy/configs/build_vehicle.yaml @@ -26,10 +26,10 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/vehicle_demo_data_v1.0/index/" - image_root: "./dataset/vehicle_demo_data_v1.0/" - data_file: "./dataset/vehicle_demo_data_v1.0/data_file.txt" - delimiter: " " + index_path: "./recognition_demo_data_v1.0/gallery_vehicle/index/" + image_root: "./recognition_demo_data_v1.0/gallery_vehicle/" + data_file: "./recognition_demo_data_v1.0/gallery_vehicle/data_file.txt" + delimiter: "\t" dist_type: "IP" pq_size: 100 embedding_size: 512 diff --git a/deploy/configs/inference_cartoon.yaml b/deploy/configs/inference_cartoon.yaml index 321ee00e837506ab0583414b416dd7a78ccbdf12..97d84667b2cac0bffc74541f3f1e2d53ccd6fda8 100644 --- a/deploy/configs/inference_cartoon.yaml +++ b/deploy/configs/inference_cartoon.yaml @@ -1,7 +1,9 @@ Global: - infer_imgs: "./dataset/cartoon_demo_data_v1.0/query/" + infer_imgs: "./recognition_demo_data_v1.0/test_cartoon" det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/" rec_inference_model_dir: "./models/cartoon_rec_ResNet50_iCartoon_v1.0_infer/" + rec_nms_thresold: 0.1 + batch_size: 1 image_shape: [3, 640, 640] threshold: 0.2 @@ -49,7 +51,7 @@ RecPreProcess: RecPostProcess: null IndexProcess: - index_path: "./dataset/cartoon_demo_data_v1.0/index/" + index_path: "./recognition_demo_data_v1.0/gallery_cartoon/index/" search_budget: 100 return_k: 5 dist_type: "IP" diff --git a/deploy/configs/inference_logo.yaml b/deploy/configs/inference_logo.yaml index 79eb69a04103e7fb471e6e3a134aefc3658c8961..6152e9cee5328c1516ccb796f2664bd1f665e929 100644 --- a/deploy/configs/inference_logo.yaml +++ b/deploy/configs/inference_logo.yaml @@ -1,7 +1,9 @@ Global: - infer_imgs: "./dataset/logo_demo_data_v1.0/query/logo_auxx-1.jpg" + infer_imgs: "./recognition_demo_data_v1.0/gallery/test_logo" det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/" rec_inference_model_dir: "./models/logo_rec_ResNet50_Logo3K_v1.0_infer/" + rec_nms_thresold: 0.3 + batch_size: 1 image_shape: [3, 640, 640] threshold: 0.2 @@ -48,7 +50,7 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/logo_demo_data_v1.0/index/" + index_path: "./recognition_demo_data_v1.0/gallery_logo/index/" search_budget: 100 return_k: 5 dist_type: "IP" diff --git a/deploy/configs/inference_product.yaml b/deploy/configs/inference_product.yaml index 27ab50ce10b8349f4f34883100821e188d89c18d..1b3291406dc83ecef6c92b4f6e1e83ef9065404c 100644 --- a/deploy/configs/inference_product.yaml +++ b/deploy/configs/inference_product.yaml @@ -1,7 +1,9 @@ Global: - infer_imgs: "./dataset/product_demo_data_v1.0/query/wangzai.jpg" + infer_imgs: "./recognition_demo_data_v1.0/test_product/daoxiangcunjinzhubing_6.jpg" det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer" rec_inference_model_dir: "./models/product_ResNet50_vd_aliproduct_v1.0_infer" + rec_nms_thresold: 0.3 + batch_size: 1 image_shape: [3, 640, 640] threshold: 0.2 @@ -48,7 +50,7 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/product_demo_data_v1.0/index" + index_path: "./recognition_demo_data_v1.0/gallery_product/index" search_budget: 100 return_k: 5 dist_type: "IP" diff --git a/deploy/configs/inference_vehicle.yaml b/deploy/configs/inference_vehicle.yaml index 1c1104f6fb87aff86c1987a80fd1e174c48ac0a6..a3dec869c0f9d9a3dce034cd9d267ca5df5daa1f 100644 --- a/deploy/configs/inference_vehicle.yaml +++ b/deploy/configs/inference_vehicle.yaml @@ -1,7 +1,9 @@ Global: - infer_imgs: "./dataset/vehicle_demo_data_v1.0/query/" + infer_imgs: "./recognition_demo_data_v1.0/test_vehicle/" det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/" rec_inference_model_dir: "./models/vehicle_cls_ResNet50_CompCars_v1.0_infer/" + rec_nms_thresold: 0.3 + batch_size: 1 image_shape: [3, 640, 640] threshold: 0.2 @@ -50,7 +52,7 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/vehicle_demo_data_v1.0/index/" + index_path: "./recognition_demo_data_v1.0/gallery_vehicle/index/" search_budget: 100 return_k: 5 dist_type: "IP" diff --git a/deploy/python/build_gallery.py b/deploy/python/build_gallery.py index 7ff82fb97f182646e8e26d3b24bc8403ea6bb476..2087d9e01b5ae778d9bb4ed46a1169dbca3d83c1 100644 --- a/deploy/python/build_gallery.py +++ b/deploy/python/build_gallery.py @@ -39,9 +39,12 @@ def split_datafile(data_file, image_root, delimiter="\t"): gallery_docs = [] with open(data_file, 'r', encoding='utf-8') as f: lines = f.readlines() - for i, line in enumerate(lines): - line = line.strip().split(delimiter) + for _, ori_line in enumerate(lines): + line = ori_line.strip().split(delimiter) + text_num = len(line) + assert text_num >= 2, f"line({ori_line}) must be splitted into at least 2 parts, but got {text_num}" image_file = os.path.join(image_root, line[0]) + image_doc = line[1] gallery_images.append(image_file) gallery_docs.append(image_doc) diff --git a/deploy/python/predict_system.py b/deploy/python/predict_system.py index 0f30cb45cdc333023c5a06552e5e54301d6e5489..1683733616f2a27010f797fef96e133505035c75 100644 --- a/deploy/python/predict_system.py +++ b/deploy/python/predict_system.py @@ -50,17 +50,51 @@ class SystemPredictor(object): results.append({ "class_id": 0, "score": 1.0, - "bbox": np.array([0, 0, shape[1], shape[0]]), + "bbox": + np.array([0, 0, shape[1], shape[0]]), # xmin, ymin, xmax, ymax "label_name": "foreground", }) return results + def nms_to_rec_results(self, results, thresh=0.3): + filtered_results = [] + x1 = np.array([r["bbox"][0] for r in results]).astype("float32") + y1 = np.array([r["bbox"][1] for r in results]).astype("float32") + x2 = np.array([r["bbox"][2] for r in results]).astype("float32") + y2 = np.array([r["bbox"][3] for r in results]).astype("float32") + scores = np.array([r["rec_scores"] for r in results]) + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + while order.size > 0: + i = order[0] + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.minimum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.maximum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + + filtered_results.append(results[i]) + + return filtered_results + def predict(self, img): output = [] + # st1: get all detection results results = self.det_predictor.predict(img) - # add the whole image for recognition + + # st2: add the whole image for recognition to improve recall results = self.append_self(results, img.shape) + # st3: recognition process, use score_thres to ensure accuracy for result in results: preds = {} xmin, ymin, xmax, ymax = result["bbox"].astype("int") @@ -75,11 +109,12 @@ class SystemPredictor(object): if scores[0] >= self.config["IndexProcess"]["score_thres"]: preds["rec_docs"] = docs[0] preds["rec_scores"] = scores[0] - else: - preds["rec_docs"] = None - preds["rec_scores"] = 0.0 + output.append(preds) + + # st5: nms to the final results to avoid fetching duplicate results + output = self.nms_to_rec_results( + output, self.config["Global"]["rec_nms_thresold"]) - output.append(preds) return output diff --git a/docs/images/recognition/product_demo/anmuxi.jpg b/docs/images/recognition/product_demo/query/anmuxi.jpg similarity index 100% rename from docs/images/recognition/product_demo/anmuxi.jpg rename to docs/images/recognition/product_demo/query/anmuxi.jpg diff --git a/docs/images/recognition/product_demo/query/daoxiangcunjinzhubing_6.jpg b/docs/images/recognition/product_demo/query/daoxiangcunjinzhubing_6.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fc64a9531db0829d42b51e888361fa697afd080f Binary files /dev/null and b/docs/images/recognition/product_demo/query/daoxiangcunjinzhubing_6.jpg differ diff --git a/docs/images/recognition/product_demo/result/anmuxi.jpg b/docs/images/recognition/product_demo/result/anmuxi.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8749fe1cb39b1ce6b6771fffeaa8f383bc267920 Binary files /dev/null and b/docs/images/recognition/product_demo/result/anmuxi.jpg differ diff --git a/docs/images/recognition/product_demo/result/daoxiangcunjinzhubing_6.jpg b/docs/images/recognition/product_demo/result/daoxiangcunjinzhubing_6.jpg new file mode 100644 index 0000000000000000000000000000000000000000..babc6cea1f1145889f7f128908235c49a92d54c4 Binary files /dev/null and b/docs/images/recognition/product_demo/result/daoxiangcunjinzhubing_6.jpg differ diff --git a/docs/zh_CN/tutorials/quick_start_recognition.md b/docs/zh_CN/tutorials/quick_start_recognition.md index d2c3ed145f116e757ff3094dc914448df4d2f390..dc9fa2c0ccb823014f974d0a1cfdd25ea5367ac8 100644 --- a/docs/zh_CN/tutorials/quick_start_recognition.md +++ b/docs/zh_CN/tutorials/quick_start_recognition.md @@ -34,13 +34,16 @@ 检测模型与4个方向(Logo、动漫人物、车辆、商品)的识别inference模型、测试数据下载地址以及对应的配置文件地址如下。 -| 模型简介 | 推荐场景 | 测试数据地址 | inference模型 | 预测配置文件 | 构建索引库的配置文件 | -| ------------ | ------------- | ------- | -------- | ------- | -------- | -| 通用主体检测模型 | 通用场景 | - |[模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar) | - | - | -| Logo识别模型 | Logo场景 | [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/logo_demo_data_v1.0.tar) | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/logo_rec_ResNet50_Logo3K_v1.0_infer.tar) | [inference_logo.yaml](../../../deploy/configs/inference_logo.yaml) | [build_logo.yaml](../../../deploy/configs/build_logo.yaml) | -| 动漫人物识别模型 | 动漫人物场景 | [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/cartoon_demo_data_v1.0.tar) | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/cartoon_rec_ResNet50_iCartoon_v1.0_infer.tar) | [inference_cartoon.yaml](../../../deploy/configs/inference_cartoon.yaml) | [build_cartoon.yaml](../../../deploy/configs/build_cartoon.yaml) | -| 车辆细分类模型 | 车辆场景 | [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/vehicle_demo_data_v1.0.tar) | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/vehicle_cls_ResNet50_CompCars_v1.0_infer.tar) | [inference_vehicle.yaml](../../../deploy/configs/inference_vehicle.yaml) | [build_vehicle.yaml](../../../deploy/configs/build_vehicle.yaml) | -| 商品识别模型 | 商品场景 | [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/product_demo_data_v1.0.tar) | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/product_ResNet50_vd_aliproduct_v1.0_infer.tar) | [inference_product.yaml](../../../deploy/configs/inference_product.yaml) | [build_product.yaml](../../../deploy/configs/build_product.yaml) | +| 模型简介 | 推荐场景 | inference模型 | 预测配置文件 | 构建索引库的配置文件 | +| ------------ | ------------- | -------- | ------- | -------- | +| 通用主体检测模型 | 通用场景 |[模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar) | - | - | +| Logo识别模型 | Logo场景 | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/logo_rec_ResNet50_Logo3K_v1.0_infer.tar) | [inference_logo.yaml](../../../deploy/configs/inference_logo.yaml) | [build_logo.yaml](../../../deploy/configs/build_logo.yaml) | +| 动漫人物识别模型 | 动漫人物场景 | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/cartoon_rec_ResNet50_iCartoon_v1.0_infer.tar) | [inference_cartoon.yaml](../../../deploy/configs/inference_cartoon.yaml) | [build_cartoon.yaml](../../../deploy/configs/build_cartoon.yaml) | +| 车辆细分类模型 | 车辆场景 | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/vehicle_cls_ResNet50_CompCars_v1.0_infer.tar) | [inference_vehicle.yaml](../../../deploy/configs/inference_vehicle.yaml) | [build_vehicle.yaml](../../../deploy/configs/build_vehicle.yaml) | +| 商品识别模型 | 商品场景 | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/product_ResNet50_vd_aliproduct_v1.0_infer.tar) | [inference_product.yaml](../../../deploy/configs/inference_product.yaml) | [build_product.yaml](../../../deploy/configs/build_product.yaml) | + + +本章节demo数据下载地址如下: [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/recognition_demo_data_v1.0.tar)。 **注意** @@ -60,32 +63,23 @@ brew install wget * 可以按照下面的命令下载并解压数据与模型 ```shell -mkdir dataset -cd dataset -# 下载demo数据并解压 -wget {数据下载链接地址} && tar -xf {压缩包的名称} -cd .. - mkdir models cd models # 下载识别inference模型并解压 wget {模型下载链接地址} && tar -xf {压缩包的名称} cd .. + +# 下载demo数据并解压 +wget {数据下载链接地址} && tar -xf {压缩包的名称} ``` ### 2.1 下载、解压inference 模型与demo数据 -以商品识别为例,下载通用检测、识别模型以及商品识别demo数据,命令如下。 +以商品识别为例,下载demo数据集以及通用检测、识别模型,命令如下。 ```shell -mkdir dataset -cd dataset -# 下载demo数据并解压 -wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/product_demo_data_v1.0.tar && tar -xf product_demo_data_v1.0.tar -cd .. - mkdir models cd models # 下载通用检测inference模型并解压 @@ -93,21 +87,27 @@ wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/infere # 下载识别inference模型并解压 wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/product_ResNet50_vd_aliproduct_v1.0_infer.tar && tar -xf product_ResNet50_vd_aliproduct_v1.0_infer.tar -cd .. +# 下载demo数据并解压 +wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/recognition_demo_data_v1.0.tar && tar -xf recognition_demo_data_v1.0.tar ``` -解压完毕后,`dataset`文件夹下应有如下文件结构: +解压完毕后,`recognition_demo_data_v1.0`文件夹下应有如下文件结构: ``` -├── product_demo_data_v1.0 -│ ├── data_file.txt -│ ├── gallery -│ ├── index -│ └── query +├── recognition_demo_data_v1.0 +│ ├── gallery_cartoon +│ ├── gallery_logo +│ ├── gallery_product +│ ├── gallery_vehicle +│ ├── test_cartoon +│ ├── test_logo +│ ├── test_product +│ └── test_vehicle ├── ... ``` -其中`data_file.txt`是用于构建索引库的图像列表文件,`gallery`文件夹中是所有用于构建索引库的图像原始文件,`index`文件夹中是构建索引库生成的索引文件,`query`是用来测试识别效果的demo图像。 +其中`gallery_xxx`文件夹中存放的是用于构建索引库的原始图像,`test_xxx`文件夹中存放的是用于测试识别效果的图像列表。 + `models`文件夹下应有如下文件结构: @@ -131,7 +131,7 @@ cd .. #### 2.2.1 识别单张图像 -运行下面的命令,对图像`./dataset/product_demo_data_v1.0/query/wangzai.jpg`进行识别与检索 +运行下面的命令,对图像`./recognition_demo_data_v1.0/test_product/daoxiangcunjinzhubing_6.jpg`进行识别与检索 ```shell # 使用下面的命令使用GPU进行预测 @@ -140,29 +140,30 @@ python3.7 python/predict_system.py -c configs/inference_product.yaml python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.use_gpu=False ``` +注意:这里使用了默认编译生成的库文件进行特征索引,如果与您的环境不兼容,导致程序报错,可以参考[向量检索教程](../../../deploy/vector_search/README.md)重新编译库文件。 + 待检索图像如下所示。