diff --git a/deploy/configs/build_cartoon.yaml b/deploy/configs/build_cartoon.yaml index 4c000de1f0c1b0fd41d50296f39b026745442788..3c93a6faa4f3c3f3e9e5a89eb7060ca265b29c9b 100644 --- a/deploy/configs/build_cartoon.yaml +++ b/deploy/configs/build_cartoon.yaml @@ -28,9 +28,9 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/cartoon_demo_data_v1.0/index/" - image_root: "./dataset/cartoon_demo_data_v1.0/" - data_file: "./dataset/cartoon_demo_data_v1.0/data_file.txt" + index_path: "./recognition_demo_data_v1.0/gallery_cartoon/index/" + image_root: "./recognition_demo_data_v1.0/gallery_cartoon/" + data_file: "./recognition_demo_data_v1.0/gallery_cartoon/data_file.txt" delimiter: "\t" dist_type: "IP" pq_size: 100 diff --git a/deploy/configs/build_logo.yaml b/deploy/configs/build_logo.yaml index 8bcb9be6f2808d381cd728910c78559a8a160dbe..1f3800e2750ccf3290732ac0c022379aedfb0c72 100644 --- a/deploy/configs/build_logo.yaml +++ b/deploy/configs/build_logo.yaml @@ -26,9 +26,9 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/logo_demo_data_v1.0/index/" - image_root: "./dataset/logo_demo_data_v1.0/" - data_file: "./dataset/logo_demo_data_v1.0/data_file.txt" + index_path: "./recognition_demo_data_v1.0/gallery_logo/index/" + image_root: "./recognition_demo_data_v1.0/gallery_logo/" + data_file: "./recognition_demo_data_v1.0/gallery_logo/data_file.txt" delimiter: "\t" dist_type: "IP" pq_size: 100 diff --git a/deploy/configs/build_product.yaml b/deploy/configs/build_product.yaml index ebe2d34c692b4a7de8c560792f8cbae9b52dffad..1ae4c0d53542d4289290b5cf050f99ffc5c6b80a 100644 --- a/deploy/configs/build_product.yaml +++ b/deploy/configs/build_product.yaml @@ -26,10 +26,10 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/product_demo_data_v1.0/index" - image_root: "./dataset/product_demo_data_v1.0" - data_file: "./dataset/product_demo_data_v1.0/data_file.txt" - delimiter: " " + index_path: "./recognition_demo_data_v1.0/gallery_product/index" + image_root: "./recognition_demo_data_v1.0/gallery_product/" + data_file: "./recognition_demo_data_v1.0/gallery_product/data_file.txt" + delimiter: "\t" dist_type: "IP" pq_size: 100 embedding_size: 512 diff --git a/deploy/configs/build_vehicle.yaml b/deploy/configs/build_vehicle.yaml index 2e11a7df04fbd20c35cb9961f37f3a645bf6ea5b..4897f24a4ae7be533d28bc92224a230aac99679e 100644 --- a/deploy/configs/build_vehicle.yaml +++ b/deploy/configs/build_vehicle.yaml @@ -26,10 +26,10 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/vehicle_demo_data_v1.0/index/" - image_root: "./dataset/vehicle_demo_data_v1.0/" - data_file: "./dataset/vehicle_demo_data_v1.0/data_file.txt" - delimiter: " " + index_path: "./recognition_demo_data_v1.0/gallery_vehicle/index/" + image_root: "./recognition_demo_data_v1.0/gallery_vehicle/" + data_file: "./recognition_demo_data_v1.0/gallery_vehicle/data_file.txt" + delimiter: "\t" dist_type: "IP" pq_size: 100 embedding_size: 512 diff --git a/deploy/configs/inference_cartoon.yaml b/deploy/configs/inference_cartoon.yaml index eb7c869baa3046173aa932607ef9790fe69b64b6..bb08935a168db312c954dab5c84ea4c2347847a4 100644 --- a/deploy/configs/inference_cartoon.yaml +++ b/deploy/configs/inference_cartoon.yaml @@ -1,11 +1,13 @@ Global: - infer_imgs: "./dataset/cartoon_demo_data_v1.0/query/" + infer_imgs: "./recognition_demo_data_v1.0/test_cartoon" det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/" rec_inference_model_dir: "./models/cartoon_rec_ResNet50_iCartoon_v1.0_infer/" + rec_nms_thresold: 0.05 + batch_size: 1 image_shape: [3, 640, 640] - threshold: 0.5 - max_det_results: 1 + threshold: 0.2 + max_det_results: 5 labe_list: - foreground @@ -49,7 +51,8 @@ RecPreProcess: RecPostProcess: null IndexProcess: - index_path: "./dataset/cartoon_demo_data_v1.0/index/" + index_path: "./recognition_demo_data_v1.0/gallery_cartoon/index/" search_budget: 100 return_k: 5 dist_type: "IP" + score_thres: 0.5 diff --git a/deploy/configs/inference_logo.yaml b/deploy/configs/inference_logo.yaml index ea7fede4ff95be9642286ab90ef966487927efda..a98b6c3870083065c8a426a5477f55321d4aa2f0 100644 --- a/deploy/configs/inference_logo.yaml +++ b/deploy/configs/inference_logo.yaml @@ -1,11 +1,13 @@ Global: - infer_imgs: "./dataset/logo_demo_data_v1.0/query/logo_auxx-1.jpg" + infer_imgs: "./recognition_demo_data_v1.0/test_logo" det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/" rec_inference_model_dir: "./models/logo_rec_ResNet50_Logo3K_v1.0_infer/" + rec_nms_thresold: 0.05 + batch_size: 1 image_shape: [3, 640, 640] - threshold: 0.5 - max_det_results: 1 + threshold: 0.2 + max_det_results: 5 labe_list: - foreground @@ -48,7 +50,8 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/logo_demo_data_v1.0/index/" + index_path: "./recognition_demo_data_v1.0/gallery_logo/index/" search_budget: 100 return_k: 5 dist_type: "IP" + score_thres: 0.5 diff --git a/deploy/configs/inference_product.yaml b/deploy/configs/inference_product.yaml index f85933e8fbb08419399fcea5acdc576d3d7809e8..f75fee3e151028a4345b6ce728121deffd0a6ab4 100644 --- a/deploy/configs/inference_product.yaml +++ b/deploy/configs/inference_product.yaml @@ -1,11 +1,13 @@ Global: - infer_imgs: "./dataset/product_demo_data_v1.0/query/wangzai.jpg" + infer_imgs: "./recognition_demo_data_v1.0/test_product/daoxiangcunjinzhubing_6.jpg" det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer" rec_inference_model_dir: "./models/product_ResNet50_vd_aliproduct_v1.0_infer" + rec_nms_thresold: 0.05 + batch_size: 1 image_shape: [3, 640, 640] threshold: 0.2 - max_det_results: 1 + max_det_results: 5 labe_list: - foreground @@ -48,7 +50,8 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/product_demo_data_v1.0/index" + index_path: "./recognition_demo_data_v1.0/gallery_product/index" search_budget: 100 return_k: 5 dist_type: "IP" + score_thres: 0.5 diff --git a/deploy/configs/inference_vehicle.yaml b/deploy/configs/inference_vehicle.yaml index ecf5f8cc6467087b6168573b874592645cdc9444..17f70abccd725363da58f54494627b13f450cb6a 100644 --- a/deploy/configs/inference_vehicle.yaml +++ b/deploy/configs/inference_vehicle.yaml @@ -1,11 +1,13 @@ Global: - infer_imgs: "./dataset/vehicle_demo_data_v1.0/query/" + infer_imgs: "./recognition_demo_data_v1.0/test_vehicle/" det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/" rec_inference_model_dir: "./models/vehicle_cls_ResNet50_CompCars_v1.0_infer/" + rec_nms_thresold: 0.05 + batch_size: 1 image_shape: [3, 640, 640] - threshold: 0.5 - max_det_results: 1 + threshold: 0.2 + max_det_results: 5 labe_list: - foreground @@ -50,7 +52,8 @@ RecPostProcess: null # indexing engine config IndexProcess: - index_path: "./dataset/vehicle_demo_data_v1.0/index/" + index_path: "./recognition_demo_data_v1.0/gallery_vehicle/index/" search_budget: 100 return_k: 5 dist_type: "IP" + score_thres: 0.5 diff --git a/deploy/python/build_gallery.py b/deploy/python/build_gallery.py index c48b3829158f67e2c67a8522f71406542948cc74..2087d9e01b5ae778d9bb4ed46a1169dbca3d83c1 100644 --- a/deploy/python/build_gallery.py +++ b/deploy/python/build_gallery.py @@ -39,10 +39,13 @@ def split_datafile(data_file, image_root, delimiter="\t"): gallery_docs = [] with open(data_file, 'r', encoding='utf-8') as f: lines = f.readlines() - for i, line in enumerate(lines): - line = line.strip().split(delimiter) + for _, ori_line in enumerate(lines): + line = ori_line.strip().split(delimiter) + text_num = len(line) + assert text_num >= 2, f"line({ori_line}) must be splitted into at least 2 parts, but got {text_num}" image_file = os.path.join(image_root, line[0]) - image_doc = line[1] + + image_doc = line[1] gallery_images.append(image_file) gallery_docs.append(image_doc) @@ -57,28 +60,34 @@ class GalleryBuilder(object): assert 'IndexProcess' in config.keys(), "Index config not found ... " self.build(config['IndexProcess']) - def build(self, config): ''' build index from scratch ''' - gallery_images, gallery_docs = split_datafile(config['data_file'], - config['image_root'], config['delimiter']) + gallery_images, gallery_docs = split_datafile( + config['data_file'], config['image_root'], config['delimiter']) # extract gallery features - gallery_features = np.zeros([len(gallery_images), - config['embedding_size']], dtype=np.float32) + gallery_features = np.zeros( + [len(gallery_images), config['embedding_size']], dtype=np.float32) for i, image_file in enumerate(tqdm(gallery_images)): - img = cv2.imread(image_file)[:, :, ::-1] + img = cv2.imread(image_file) + if img is None: + logger.error("img empty, please check {}".format(image_file)) + exit() + img = img[:, :, ::-1] rec_feat = self.rec_predictor.predict(img) - gallery_features[i,:] = rec_feat + gallery_features[i, :] = rec_feat # train index - self.Searcher = Graph_Index(dist_type=config['dist_type']) - self.Searcher.build(gallery_vectors=gallery_features, gallery_docs=gallery_docs, - pq_size=config['pq_size'], index_path=config['index_path']) - + self.Searcher = Graph_Index(dist_type=config['dist_type']) + self.Searcher.build( + gallery_vectors=gallery_features, + gallery_docs=gallery_docs, + pq_size=config['pq_size'], + index_path=config['index_path']) + def main(config): system_builder = GalleryBuilder(config) diff --git a/deploy/python/predict_system.py b/deploy/python/predict_system.py index 63f2befba85f55cfbe583ba3258c0b9bfbe1ca48..3f5d63a812db6d0376abea04ed8242c65638d6bf 100644 --- a/deploy/python/predict_system.py +++ b/deploy/python/predict_system.py @@ -46,24 +46,72 @@ class SystemPredictor(object): dist_type=config['IndexProcess']['dist_type']) self.Searcher.load(config['IndexProcess']['index_path']) + def append_self(self, results, shape): + results.append({ + "class_id": 0, + "score": 1.0, + "bbox": + np.array([0, 0, shape[1], shape[0]]), # xmin, ymin, xmax, ymax + "label_name": "foreground", + }) + return results + + def nms_to_rec_results(self, results, thresh=0.1): + filtered_results = [] + x1 = np.array([r["bbox"][0] for r in results]).astype("float32") + y1 = np.array([r["bbox"][1] for r in results]).astype("float32") + x2 = np.array([r["bbox"][2] for r in results]).astype("float32") + y2 = np.array([r["bbox"][3] for r in results]).astype("float32") + scores = np.array([r["rec_scores"] for r in results]) + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + while order.size > 0: + i = order[0] + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + filtered_results.append(results[i]) + + return filtered_results + def predict(self, img): output = [] + # st1: get all detection results results = self.det_predictor.predict(img) + + # st2: add the whole image for recognition to improve recall + results = self.append_self(results, img.shape) + + # st3: recognition process, use score_thres to ensure accuracy for result in results: preds = {} xmin, ymin, xmax, ymax = result["bbox"].astype("int") crop_img = img[ymin:ymax, xmin:xmax, :].copy() rec_results = self.rec_predictor.predict(crop_img) - #preds["feature"] = rec_results preds["bbox"] = [xmin, ymin, xmax, ymax] scores, docs = self.Searcher.search( query=rec_results, return_k=self.return_k, search_budget=self.search_budget) - preds["rec_docs"] = docs - preds["rec_scores"] = scores + # just top-1 result will be returned for the final + if scores[0] >= self.config["IndexProcess"]["score_thres"]: + preds["rec_docs"] = docs[0] + preds["rec_scores"] = scores[0] + output.append(preds) + + # st5: nms to the final results to avoid fetching duplicate results + output = self.nms_to_rec_results( + output, self.config["Global"]["rec_nms_thresold"]) - output.append(preds) return output @@ -75,7 +123,7 @@ def main(config): for idx, image_file in enumerate(image_list): img = cv2.imread(image_file)[:, :, ::-1] output = system_predictor.predict(img) - draw_bbox_results(img[:, :, ::-1], output, image_file) + draw_bbox_results(img, output, image_file) print(output) return diff --git a/deploy/utils/draw_bbox.py b/deploy/utils/draw_bbox.py index 1896f9386a6768c3c7562043b86e68007b3769da..7c8044d1b84a996a4ce4bbe6c62d14a8e15b56ee 100644 --- a/deploy/utils/draw_bbox.py +++ b/deploy/utils/draw_bbox.py @@ -15,18 +15,46 @@ import os import numpy as np import cv2 +from PIL import Image, ImageDraw, ImageFont -def draw_bbox_results(image, results, input_path, save_dir=None): +def draw_bbox_results(image, + results, + input_path, + font_path="./utils/simfang.ttf", + save_dir=None): + if isinstance(image, np.ndarray): + image = Image.fromarray(image) + draw = ImageDraw.Draw(image) + font_size = 18 + font = ImageFont.truetype(font_path, font_size, encoding="utf-8") + + color = (0, 102, 255) + for result in results: - [xmin, ymin, xmax, ymax] = result["bbox"] + # empty results + if result["rec_docs"] is None: + continue + + xmin, ymin, xmax, ymax = result["bbox"] + text = "{}, {:.2f}".format(result["rec_docs"], result["rec_scores"]) + th = font_size + tw = int(len(result["rec_docs"]) * font_size) + 60 + start_y = max(0, ymin - th) - image = cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 0), - 2) + draw.rectangle( + [(xmin + 1, start_y), (xmin + tw + 1, start_y + th)], fill=color) + + draw.text((xmin + 1, start_y), text, fill=(255, 255, 255), font=font) + + draw.rectangle( + [(xmin, ymin), (xmax, ymax)], outline=(255, 0, 0), width=2) image_name = os.path.basename(input_path) if save_dir is None: save_dir = "output" os.makedirs(save_dir, exist_ok=True) output_path = os.path.join(save_dir, image_name) - cv2.imwrite(output_path, image) + + image.save(output_path, quality=95) + return np.array(image) diff --git a/deploy/utils/simfang.ttf b/deploy/utils/simfang.ttf new file mode 100644 index 0000000000000000000000000000000000000000..2b59eae4195d1cdbea375503c0cc34d5631cb0f9 Binary files /dev/null and b/deploy/utils/simfang.ttf differ diff --git a/deploy/vector_search/interface.py b/deploy/vector_search/interface.py index 218adbfe9826be2332f2262d671ab5566c2f2fb1..13e93cadfa6dbea508d0ddf221b091b9d407a1f2 100644 --- a/deploy/vector_search/interface.py +++ b/deploy/vector_search/interface.py @@ -29,7 +29,14 @@ if platform.system() == "Windows": else: lib_filename = "index.so" so_path = os.path.join(__dir__, lib_filename) -lib = ctypes.cdll.LoadLibrary(so_path) +try: + lib = ctypes.cdll.LoadLibrary(so_path) +except Exception as ex: + readme_path = os.path.join(__dir__, "README.md") + print( + f"Error happened when load lib {so_path} with msg {ex},\nplease refer to {readme_path} to rebuild your library." + ) + exit(-1) class IndexContext(Structure): diff --git a/docs/en/tutorials/getting_started_retrieval_en.md b/docs/en/tutorials/getting_started_retrieval_en.md new file mode 100644 index 0000000000000000000000000000000000000000..eea6c1667036ab6eb8c554b6959d8d1cc669e86a --- /dev/null +++ b/docs/en/tutorials/getting_started_retrieval_en.md @@ -0,0 +1,253 @@ +# Quick Start +--- +At first,please take a reference to [Installation Guide](./install_en.md) to prepare your environment. + +PaddleClas image retrieval supports the following training/evaluation environments: +```shell +└── CPU/Single GPU +    ├── Linux +    └── Windows +``` +## Content + +* [1. Data Preparation](#Data-Preparation) +* [2. Training and Evaluation on Single GPU](#Training-and-Evaluation-on-Single-GPU) + * [2.1 Model Training](#Model-Training) + * [2.2 Resume Training](#Resume-Training) + * [2.3 Model Evaluation](#Model-Evaluation) +* [3. Export Inference Model](#Export-Inference-Model) + + +## 1. Data Preparation + +* Go to PaddleClas directory。 + +```bash +## linux or mac, $path_to_PaddleClas indicates the root directory of PaddleClas, which the user needs to modify according to their real directory +cd $path_to_PaddleClas +``` + +* Please go to the `dataset` catalog. In order to quickly experiment the image retrieval module of PaddleClas, the dataset we used is [CUB_200_2011](http://vision.ucsd.edu/sites/default/files/WelinderEtal10_CUB-200.pdf), which is a fine grid dataset with 200 different types of birds. Firstly, we need to download the dataset. For download, please refer to [Official Website](http://www.vision.caltech.edu/visipedia/CUB-200-2011.html). + +```shell +# linux or mac +cd dataset + +# Copy the downloaded data into a directory. +cp {Data storage path}/CUB_200_2011.tgz . + +# Unzip +tar -xzvf CUB_200_2011.tgz + +#go to `CUB_200_2011` +cd CUB_200_2011 +``` + +When using the dataset for image retrieval, we usually use the first 100 classes as the training set, and the last 100 classes as the testing set, so we need to process those data so as to adapt the model training of image retrival. + +```shell +#Create train and test directories +mkdir train && mkdir test + +#Divide data into training set with the first 100 classes and testing set with the last 100 classes. +ls images | awk -F "." '{if(int($1)<101)print "mv images/"$0" train/"int($1)}' | sh +ls images | awk -F "." '{if(int($1)>100)print "mv images/"$0" test/"int($1)}' | sh + +#Generate train_list.txt test_list.txt +tree -r -i -f train | grep jpg | awk -F "/" '{print $0" "int($2) " "NR}' > train_list.txt +tree -r -i -f test | grep jpg | awk -F "/" '{print $0" "int($2) " "NR}' > test_list.txt +``` + + +So far, we have the training set (in the `train` catalog) and testing set (in the `test` catalog) of `CUB_200_2011`. +After data preparation, the `train` directory of `CUB_200_2011` should be: + +``` +├── 1 +│   ├── Black_Footed_Albatross_0001_796111.jpg +│   ├── Black_Footed_Albatross_0002_55.jpg + ... +├── 10 +│   ├── Red_Winged_Blackbird_0001_3695.jpg +│   ├── Red_Winged_Blackbird_0005_5636.jpg +... +``` + +`train_list.txt` Should be: + +``` +train/99/Ovenbird_0137_92639.jpg 99 1 +train/99/Ovenbird_0136_92859.jpg 99 2 +train/99/Ovenbird_0135_93168.jpg 99 3 +train/99/Ovenbird_0131_92559.jpg 99 4 +train/99/Ovenbird_0130_92452.jpg 99 5 +... +``` +The separators are shown as spaces, and the meaning of those three columns of data are the directory of training set, labels of training set and unique ids of training set. + +The format of testing set is the same as the one of training set. + + +**Note**: + +* When the gallery dataset and query dataset are the same, in order to remove the first data retrieved (the retrieved images themselves do not need to be evaluated), each data needs to correspond to a unique id for subsequent evaluation of metrics such as mAP, recall@1, etc. Please refer to [Introduction to image retrieval datasets](#Introduction to image retrieval datasets) for the analysis of gallery datasets and query datasets, and [Image retrieval evaluation metrics](#Image retrieval evaluation metrics) for the evaluation of mAP, recall@1, etc. + +Back to `PaddleClas` root directory + +```shell +# linux or mac +cd ../../ +``` + + +## 2. Single GPU-based Training and Evaluation + +For training and evaluation on a single GPU, the `tools/train.py` and `tools/eval.py` scripts are recommended. + + + +### 2.1 Model Training + +Once you have prepared the configuration file, you can start training the image retrieval task in the following way. the method used by PaddleClas to train the image retrieval is metric learning, refering to [metric learning](#Metric-Learning) for an explanation of metric learning. + + +``` +python3 tools/train.py \ + -c ./ppcls/configs/quick_start/MobileNetV1_retrieval.yaml \ + -o Arch.Backbone.pretrained=True \ + -o Global.device=gpu +``` + + `-c` is used to specify the path to the configuration file, and `-o` is used to specify the parameters that need to be modified or added, where `-o Arch.Backbone.pretrained=True` indicates that the Backbone part uses the pre-trained model, in addition, `Arch.Backbone.pretrained` can also specify backbone.`pretrained` can also specify the address of a specific model weight file, which needs to be replaced with the path to your own pre-trained model weight file when using it. `-o Global.device=gpu` indicates that the GPU is used for training. If you want to use a CPU for training, you need to set `Global.device` to `cpu`. + +For more detailed training configuration, you can also modify the corresponding configuration file of the model directly. Refer to the [configuration document](config_en.md) for specific configuration parameters. + +Run the above commands to check the output log, an example is as follows: + + ``` + ... + [Train][Epoch 1/50][Avg]CELoss: 6.59110, TripletLossV2: 0.54044, loss: 7.13154 + ... + [Eval][Epoch 1][Avg]recall1: 0.46962, recall5: 0.75608, mAP: 0.21238 + ... + ``` + +The Backbone here is MobileNetV1, if you want to use other backbone, you can rewrite the parameter `Arch.Backbone.name`, for example by adding `-o Arch.Backbone.name={other Backbone}` to the command. In addition, as the input dimension of the `Neck` section differs between models, replacing a Backbone may require rewriting the input size here in a similar way to replacing the Backbone's name. + +In the Training Loss section, [CELoss](../../../ppcls/loss/celoss.py) and [TripletLossV2](../../../ppcls/loss/triplet.py) is used here with the following configuration files. + +``` +Loss: + Train: + - CELoss: + weight: 1.0 + - TripletLossV2: + weight: 1.0 + margin: 0.5 +``` + +The final total Loss is a weighted sum of all Losses, where weight defines the weight of a particular Loss in the final total. If you want to replace other Losses, you can also change the Loss field in the configuration file, for the currently supported Losses please refer to [Loss](../../../ppcls/loss). + + +### 2.2 Resume Training + +If the training task is terminated for some reasons, it can be recovered by loading the checkpoints weights file and continue training. + + +``` +python3 tools/train.py \ + -c ./ppcls/configs/quick_start/MobileNetV1_retrieval.yaml \ + -o Global.checkpoints="./output/RecModel/epoch_5" \ + -o Global.device=gpu +``` + +There is no need to modify the configuration file, just set the `Global.checkpoints` parameter when continuing training, indicating the path to the loaded breakpoint weights file, using this parameter will load both the saved checkpoints weights and information about the learning rate, optimizer, etc. + +**Note**: + +* The `-o Global.checkpoints` parameter need not contain the suffix name of the checkpoint weights file, the above training command will generate the breakpoint weights file as shown below during training, if you want to continue training from breakpoint `5` then the `Global.checkpoints` parameter just needs to be set to `". /output/RecModel/epoch_5"` and PaddleClas will automatically supplement the suffix name. + + ```shell + output/ + └── RecModel + ├── best_model.pdopt + ├── best_model.pdparams + ├── best_model.pdstates + ├── epoch_1.pdopt + ├── epoch_1.pdparams + ├── epoch_1.pdstates + . + . + . + ``` + + +### 2.3 Model Evaluation + +Model evaluation can be carried out with the following commands. + +```bash +python3 tools/eval.py \ + -c ./ppcls/configs/quick_start/MobileNetV1_retrieval.yaml \ + -o Global.pretrained_model=./output/RecModel/best_model +``` + +The above command will use `. /configs/quick_start/MobileNetV1_retrieval.yaml` as a configuration file to evaluate the model obtained from the above training `. /output/RecModel/best_model` for evaluation. You can also set up the evaluation by changing the parameters in the configuration file, or you can update the configuration with the `-o` parameter, as shown above. + +Some of the configurable evaluation parameters are introduced as follows. +* `Arch.name`: the name of the model +* `Global.pretrained_model`: path to the pre-trained model file of the model to be evaluated, unlike `Global.Backbone.pretrained` where the pre-trained model is the weight of the whole model, whereas `Global.Backbone.pretrained` is only the Backbone.`pretrained` is only the weight of the Backbone part. When it is time to do model evaluation, the weights of the whole model need to be loaded. +* `Metric.Eval`: the metric to be evaluated, by default evaluates recall@1, recall@5, mAP. when you are not going to evaluate a metric, you can remove the corresponding trial marker from the configuration file; when you want to add a certain evaluation metric, you can also refer to [Metric](../../../ppcls/metric/metrics.py) section to add the relevant metric to the configuration file `Metric.Eval`. + +**Note:** + +* When loading the model to be evaluated, the path to the model file needs to be specified, but it is not necessary to include the file suffix, PaddleClas will automatically complete the `.pdparams` suffix, e.g. [2.2 Resume Training](#Resume-Training). + +* Metric learning are generally not evaluated for TopkAcc. + + +## 3. Export Inference Model + +By exporting the inference model, PaddlePaddle supports the transformation of the trained model using prediction with inference engine. + +```bash +python3 tools/export_model.py \ + -c ./ppcls/configs/quick_start/MobileNetV1_retrieval.yaml \ + -o Global.pretrained_model=output/RecModel/best_model \ + -o Global.save_inference_dir=./inference +``` + + `Global.pretrained_model` is used to specify the model file path, which still does not need to contain the model file suffix (e.g. [2.2 Model recovery training](#Model recovery training)). When executed, it will generate the `. /inference` directory, which contains the `inference.pdiparams`, `inference.pdiparams.info`, and `inference.pdmodel` files. `Global.save_inference_dir` allows you to specify the path to export the inference model. The inference model saved here is truncated at the embedding feature level, i.e. the final output of the model is n-dimensional embedding features. + +The above command will generate the model structure file (`inference.pdmodel`) and the model weights file (`inference.pdiparams`), which can then be used for inference using the inference engine. The process of inference using the inference model can be found in [Predictive inference based on the Python prediction engine](@shengyu). + +## Basic knowledge + +Image retrieval refers to a query image given a specific instance (e.g. a specific target, scene, item, etc.) that contains the same instance from a database image. Unlike image classification, image retrieval solves an open set problem where the training set may not contain the class of the image being recognised. The overall process of image retrieval is: firstly, the images are represented in a suitable feature vector, secondly, a nearest neighbour search is performed on these image feature vectors using Euclidean or Cosine distances to find similar images in the base, and finally, some post-processing techniques can be used to fine-tune the retrieval results and determine information such as the category of the image being recognised. Therefore, the key to determining the performance of an image retrieval algorithm lies in the goodness of the feature vectors corresponding to the images. + + +- Metric Learning + +Metric learning studies how to learn a distance function on a particular task so that the distance function can help nearest-neighbour based algorithms (kNN, k-means, etc.) to achieve better performance. Deep Metric Learning is a method of metric learning that aims to learn a mapping from the original features to a low-dimensional dense vector space (embedding space) such that similar objects on the embedding space are closer together using commonly used distance functions (Euclidean distance, cosine distance, etc.) ) on the embedding space, while the distances between objects of different classes are relatively close to each other. Deep metric learning has achieved very successful applications in the field of computer vision, such as face recognition, commodity recognition, image retrieval, pedestrian re-identification, etc. + + +- Introduction to image retrieval datasets + + - Training Dataset: used to train the model so that it can learn the image features of the collection. + - Gallery Dataset: used to provide the gallery data for the image retrieval task. The gallery dataset can be the same as the training set or the test set, or different. + - Test Set (Query Dataset): used to test the goodness of the model, usually each test image in the test set is extracted with features, and then matched with the features of the underlying data to obtain recognition results, and then the metrics of the whole test set are calculated based on the recognition results. + + +- Image Retrieval Evaluation Metrics + + + - recall:indicates the number of predicted positive cases with positive labels / the number of cases with positive labels + + - recall@1:Number of predicted positive cases in top-1 with positive label / Number of cases with positive label + - recall@5:Number of all predicted positive cases in top-5 retrieved with positive label / Number of cases with positive label + + + - mean Average Precision(mAP) + + - AP: AP refers to the average precision on different recall rates + - mAP: Average of the APs for all images in the test set diff --git a/docs/en/tutorials/quick_start_recognition_en.md b/docs/en/tutorials/quick_start_recognition_en.md index fdd14589c5325f277fbc492f5406402a9d0d36be..7d9925ceb002631a1f3907e6e2b9b2104d3daac2 100644 --- a/docs/en/tutorials/quick_start_recognition_en.md +++ b/docs/en/tutorials/quick_start_recognition_en.md @@ -34,32 +34,43 @@ If the image category already exists in the image index database, then you can t The detection model with the recognition inference model for the 4 directions (Logo, Cartoon Face, Vehicle, Product), the address for downloading the test data and the address of the corresponding configuration file are as follows. -| Models Introduction | Recommended Scenarios | Test Data Address | inference Model | Predict Config File | Config File to Build Index Database | -| ------------ | ------------- | ------- | -------- | ------- | -------- | -| Generic mainbody detection model | General Scenarios | - |[Model Download Link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar) | - | - | -| Logo Recognition Model | Logo Scenario | [Data Download Link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/logo_demo_data_v1.0.tar) | [Model Download Link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/logo_rec_ResNet50_Logo3K_v1.0_infer.tar) | [inference_logo.yaml](../../../deploy/configs/inference_logo.yaml) | [build_logo.yaml](../../../deploy/configs/build_logo.yaml) | -| Cartoon Face Recognition Model| Cartoon Face Scenario | [Data Download Link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/cartoon_demo_data_v1.0.tar) | [Model Download Link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/cartoon_rec_ResNet50_iCartoon_v1.0_infer.tar) | [inference_cartoon.yaml](../../../deploy/configs/inference_cartoon.yaml) | [build_cartoon.yaml](../../../deploy/configs/build_cartoon.yaml) | -| Vehicle Subclassification Model | Vehicle Scenario | [Data Download Link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/vehicle_demo_data_v1.0.tar) | [Model Download Link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/vehicle_cls_ResNet50_CompCars_v1.0_infer.tar) | [inference_vehicle.yaml](../../../deploy/configs/inference_vehicle.yaml) | [build_vehicle.yaml](../../../deploy/configs/build_vehicle.yaml) | -| Product Recignition Model | Product Scenario | [Data Download Link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/product_demo_data_v1.0.tar) | [Model Download Link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/product_ResNet50_vd_Inshop_v1.0_infer.tar) | [inference_inshop.yaml](../../../deploy/configs/) | [build_inshop.yaml](../../../deploy/configs/build_inshop.yaml) | +| Models Introduction | Recommended Scenarios | inference Model | Predict Config File | Config File to Build Index Database | +| ------------ | ------------- | -------- | ------- | -------- | +| Generic mainbody detection model | General Scenarios |[Model Download Link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar) | - | - | +| Logo Recognition Model | Logo Scenario | [Model Download Link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/logo_rec_ResNet50_Logo3K_v1.0_infer.tar) | [inference_logo.yaml](../../../deploy/configs/inference_logo.yaml) | [build_logo.yaml](../../../deploy/configs/build_logo.yaml) | +| Cartoon Face Recognition Model| Cartoon Face Scenario | [Model Download Link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/cartoon_rec_ResNet50_iCartoon_v1.0_infer.tar) | [inference_cartoon.yaml](../../../deploy/configs/inference_cartoon.yaml) | [build_cartoon.yaml](../../../deploy/configs/build_cartoon.yaml) | +| Vehicle Subclassification Model | Vehicle Scenario | [Model Download Link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/vehicle_cls_ResNet50_CompCars_v1.0_infer.tar) | [inference_vehicle.yaml](../../../deploy/configs/inference_vehicle.yaml) | [build_vehicle.yaml](../../../deploy/configs/build_vehicle.yaml) | +| Product Recignition Model | Product Scenario | [Model Download Link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/product_ResNet50_vd_Inshop_v1.0_infer.tar) | [inference_inshop.yaml](../../../deploy/configs/) | [build_inshop.yaml](../../../deploy/configs/build_inshop.yaml) | -**Attention**:If you do not have wget installed on Windows, you can download the model by copying the link into your browser and unzipping it in the appropriate folder; for Linux or macOS users, you can right-click and copy the download link to download it via the `wget` command. +Demo data in this tutorial can be downloaded here: [download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/recognition_demo_data_v1.0.tar). -* You can download and unzip the data and models by following the command below +**Attention** +1. If you do not have wget installed on Windows, you can download the model by copying the link into your browser and unzipping it in the appropriate folder; for Linux or macOS users, you can right-click and copy the download link to download it via the `wget` command. +2. If you want to install `wget` on macOS, you can run the following command. ```shell -mkdir dataset -cd dataset -# Download the demo data and unzip -wget {Data download link} && tar -xf {Name of the tar archive} -cd .. +# install homebrew +ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"; +# install wget +brew install wget +``` + +3. If you want to isntall `wget` on Windows, you can refer to [link](https://www.cnblogs.com/jeshy/p/10518062.html). If you want to install `tar` on Windows, you can refer to [link](https://www.cnblogs.com/chooperman/p/14190107.html). + + +* You can download and unzip the data and models by following the command below +```shell mkdir models cd models # Download and unzip the inference model wget {Models download link} && tar -xf {Name of the tar archive} cd .. + +# Download the demo data and unzip +wget {Data download link} && tar -xf {Name of the tar archive} ``` @@ -75,27 +86,28 @@ cd models wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar && tar -xf ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar # Download and unpack the inference model wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/product_ResNet50_vd_aliproduct_v1.0_infer.tar && tar -xf product_ResNet50_vd_aliproduct_v1.0_infer.tar - cd .. -mkdir dataset -cd dataset + # Download the demo data and unzip it -wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/product_demo_data_v1.0.tar && tar -xf product_demo_data_v1.0.tar -cd .. +wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/recognition_demo_data_v1.0.tar && tar -xf recognition_demo_data_v1.0.tar ``` -Once unpacked, the `dataset` folder should have the following file structure. +Once unpacked, the `recognition_demo_data_v1.0` folder should have the following file structure. ``` -├── product_demo_data_v1.0 -│ ├── data_file.txt -│ ├── gallery -│ ├── index -│ └── query +├── recognition_demo_data_v1.0 +│ ├── gallery_cartoon +│ ├── gallery_logo +│ ├── gallery_product +│ ├── gallery_vehicle +│ ├── test_cartoon +│ ├── test_logo +│ ├── test_product +│ └── test_vehicle ├── ... ``` -The `data_file.txt` is images list used to build the index database, the `gallery` folder contains all the original images used to build the index database, the `index` folder contains the index files generated by building the index database, and the `query` is the demo image used to test the recognition effect. +here, original images to build index are in folder `gallery_xxx`, test images are in folder `test_xxx`. You can also access specific folder for more details. The `models` folder should have the following file structure. @@ -119,33 +131,39 @@ Take the product recognition demo as an example to show the recognition and retr #### 2.2.1 Single Image Recognition -Run the following command to identify and retrieve the image `./dataset/product_demo_data_v1.0/query/wangzai.jpg` for recognition and retrieval +Run the following command to identify and retrieve the image `./recognition_demo_data_v1.0/test_product/daoxiangcunjinzhubing_6.jpg` for recognition and retrieval ```shell +# use the following command to predict using GPU. +python3.7 python/predict_system.py -c configs/inference_product.yaml +# use the following command to predict using CPU python3.7 python/predict_system.py -c configs/inference_product.yaml ``` +**Note:** Program lib used to build index is compliled on our machine, if error occured because of the environment, you can refer to [vector search tutorial](../../../deploy/vector_search/README.md) to rebuild the lib. + + The image to be retrieved is shown below.
- +
The final output is shown below. ``` -[{'bbox': [305, 226, 776, 930], 'rec_docs': ['旺仔牛奶', '旺仔牛奶', '旺仔牛奶', '旺仔牛奶', '康师傅方便面'], 'rec_scores': array([1328.1072998 , 1185.92248535, 846.88220215, 746.28546143 622.2668457 ])} +[{'bbox': [287, 129, 497, 326], 'rec_docs': '稻香村金猪饼', 'rec_scores': 0.8309420943260193}, {'bbox': [99, 242, 313, 426], 'rec_docs': '稻香村金猪饼', 'rec_scores': 0.7245652079582214}] ``` -where bbox indicates the location of the detected subject, rec_docs indicates the labels corresponding to a number of images in the index dabase that are most similar to the detected subject, and rec_scores indicates the corresponding similarity. -There are 4 `旺仔牛奶` results in 5, the recognition result is correct. +where bbox indicates the location of the detected object, rec_docs indicates the labels corresponding to the label in the index dabase that are most similar to the detected object, and rec_scores indicates the corresponding confidence. + -The detection result is also saved in the folder `output`, which is shown as follows. +The detection result is also saved in the folder `output`, for this image, the visualization result is as follows.
- +
@@ -155,34 +173,49 @@ The detection result is also saved in the folder `output`, which is shown as fol If you want to predict the images in the folder, you can directly modify the `Global.infer_imgs` field in the configuration file, or you can also modify the corresponding configuration through the following `-o` parameter. ```shell -python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./dataset/product_demo_data_v1.0/query/" +# using the following command to predict using GPU, you can append `-o Global.use_gpu=False` to predict using CPU. +python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./recognition_demo_data_v1.0/test_product/" ``` + +The results on the screen are shown as following. + +``` +... +[{'bbox': [37, 29, 123, 89], 'rec_docs': '香奈儿包', 'rec_scores': 0.6163763999938965}, {'bbox': [153, 96, 235, 175], 'rec_docs': '香奈儿包', 'rec_scores': 0.5279821157455444}] +[{'bbox': [735, 562, 1133, 851], 'rec_docs': '香奈儿包', 'rec_scores': 0.5588355660438538}] +[{'bbox': [124, 50, 230, 129], 'rec_docs': '香奈儿包', 'rec_scores': 0.6980369687080383}] +[{'bbox': [0, 0, 275, 183], 'rec_docs': '香奈儿包', 'rec_scores': 0.5818190574645996}] +[{'bbox': [400, 1179, 905, 1537], 'rec_docs': '香奈儿包', 'rec_scores': 0.9814301133155823}] +[{'bbox': [544, 4, 1482, 932], 'rec_docs': '香奈儿包', 'rec_scores': 0.5143815279006958}] +[{'bbox': [29, 42, 194, 183], 'rec_docs': '香奈儿包', 'rec_scores': 0.9543638229370117}] +... +``` + +All the visualization results are also saved in folder `output`. + + Furthermore, the recognition inference model path can be changed by modifying the `Global.rec_inference_model_dir` field, and the path of the index to the index databass can be changed by modifying the `IndexProcess.index_path` field. ## 3. Recognize Images of Unknown Category -To recognize the image `./dataset/product_demo_data_v1.0/query/anmuxi.jpg`, run the command as follows: +To recognize the image `./recognition_demo_data_v1.0/test_product/anmuxi.jpg`, run the command as follows: ```shell -python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./dataset/product_demo_data_v1.0/query/anmuxi.jpg" +python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./recognition_demo_data_v1.0/test_product/anmuxi.jpg" ``` The image to be retrieved is shown below.
- +
-The output is as follows: - -``` -[{'bbox': [243, 80, 523, 522], 'rec_docs': ['娃哈哈AD钙奶', '旺仔牛奶', '娃哈哈AD钙奶', '农夫山泉矿泉水', '红牛'], 'rec_scores': array([548.33282471, 411.85687256, 408.39770508, 400.89404297, 360.41540527])}] -``` +The output is empty. -Since the index infomation is not included in the corresponding index databse, the recognition results are not proper. At this time, we can complete the image recognition of unknown categories by constructing a new index database. +Since the index infomation is not included in the corresponding index databse, the recognition result is empty or not proper. At this time, we can complete the image recognition of unknown categories by constructing a new index database. When the index database cannot cover the scenes we actually recognise, i.e. when predicting images of unknown categories, we need to add similar images of the corresponding categories to the index databasey, thus completing the recognition of images of unknown categories ,which does not require retraining. @@ -192,28 +225,28 @@ When the index database cannot cover the scenes we actually recognise, i.e. when First, you need to copy the images which are similar with the image to retrieval to the original images for the index database. The command is as follows. ```shell -cp -r ../docs/images/recognition/product_demo/gallery/anmuxi ./dataset/product_demo_data_v1.0/gallery/ +cp -r ../docs/images/recognition/product_demo/gallery/anmuxi ./recognition_demo_data_v1.0/gallery_product/gallery/ ``` Then you need to create a new label file which records the image path and label information. Use the following command to create a new file based on the original one. ```shell # copy the file -cp dataset/product_demo_data_v1.0/data_file.txt dataset/product_demo_data_v1.0/data_file_update.txt +cp recognition_demo_data_v1.0/gallery_product/data_file.txt recognition_demo_data_v1.0/gallery_product/data_file_update.txt ``` Then add some new lines into the new label file, which is shown as follows. ``` -gallery/anmuxi/001.jpg 安慕希酸奶 -gallery/anmuxi/002.jpg 安慕希酸奶 -gallery/anmuxi/003.jpg 安慕希酸奶 -gallery/anmuxi/004.jpg 安慕希酸奶 -gallery/anmuxi/005.jpg 安慕希酸奶 -gallery/anmuxi/006.jpg 安慕希酸奶 +gallery/anmuxi/001.jpg 安慕希酸奶 +gallery/anmuxi/002.jpg 安慕希酸奶 +gallery/anmuxi/003.jpg 安慕希酸奶 +gallery/anmuxi/004.jpg 安慕希酸奶 +gallery/anmuxi/005.jpg 安慕希酸奶 +gallery/anmuxi/006.jpg 安慕希酸奶 ``` -Each line can be splited into two fields. The first field denotes the relative image path, and the second field denotes its label. The `delimiter` is `space` here. +Each line can be splited into two fields. The first field denotes the relative image path, and the second field denotes its label. The `delimiter` is `tab` here. @@ -222,25 +255,30 @@ Each line can be splited into two fields. The first field denotes the relative i Use the following command to build the index to accelerate the retrieval process after recognition. ```shell -python3.7 python/build_gallery.py -c configs/build_product.yaml -o IndexProcess.data_file="./dataset/product_demo_data_v1.0/data_file_update.txt" -o IndexProcess.index_path="./dataset/product_demo_data_v1.0/index_update" +python3.7 python/build_gallery.py -c configs/build_product.yaml -o IndexProcess.data_file="./recognition_demo_data_v1.0/gallery_product/data_file_update.txt" -o IndexProcess.index_path="./recognition_demo_data_v1.0/gallery_product/index_update" ``` -Finally, the new index information is stored in the folder`./dataset/product_demo_data_v1.0/index_update`. Use the new index database for the above index. +Finally, the new index information is stored in the folder`./recognition_demo_data_v1.0/gallery_product/index_update`. Use the new index database for the above index. ### 3.2 Recognize the Unknown Category Images -To recognize the image `./dataset/product_demo_data_v1.0/query/anmuxi.jpg`, run the command as follows. +To recognize the image `./recognition_demo_data_v1.0/test_product/anmuxi.jpg`, run the command as follows. ```shell -python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./dataset/product_demo_data_v1.0/query/anmuxi.jpg" -o IndexProcess.index_path="./dataset/product_demo_data_v1.0/index_update" +# using the following command to predict using GPU, you can append `-o Global.use_gpu=False` to predict using CPU. +python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./recognition_demo_data_v1.0/test_product/anmuxi.jpg" -o IndexProcess.index_path="./recognition_demo_data_v1.0/gallery_product/index_update" ``` The output is as follows: ``` -[{'bbox': [243, 80, 523, 522], 'rec_docs': ['安慕希酸奶', '娃哈哈AD钙奶', '安慕希酸奶', '安慕希酸奶', '安慕希酸奶'], 'rec_scores': array([1214.9597168 , 548.33282471, 547.82104492, 535.13201904, 471.52706909])}] +[{'bbox': [243, 80, 523, 522], 'rec_docs': '安慕希酸奶', 'rec_scores': 0.5570770502090454}] ``` -There are 4 `安慕希酸奶` results in 5, the recognition result is correct. +The final recognition result is `安慕希酸奶`, which is corrrect, the visualization result is as follows. + +
+ +
diff --git a/docs/images/recognition/product_demo/anmuxi.jpg b/docs/images/recognition/product_demo/query/anmuxi.jpg similarity index 100% rename from docs/images/recognition/product_demo/anmuxi.jpg rename to docs/images/recognition/product_demo/query/anmuxi.jpg diff --git a/docs/images/recognition/product_demo/query/daoxiangcunjinzhubing_6.jpg b/docs/images/recognition/product_demo/query/daoxiangcunjinzhubing_6.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fc64a9531db0829d42b51e888361fa697afd080f Binary files /dev/null and b/docs/images/recognition/product_demo/query/daoxiangcunjinzhubing_6.jpg differ diff --git a/docs/images/recognition/product_demo/result/anmuxi.jpg b/docs/images/recognition/product_demo/result/anmuxi.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4b219e2b8ee9676a772ac90f1b92e46fcc55dd11 Binary files /dev/null and b/docs/images/recognition/product_demo/result/anmuxi.jpg differ diff --git a/docs/images/recognition/product_demo/result/daoxiangcunjinzhubing_6.jpg b/docs/images/recognition/product_demo/result/daoxiangcunjinzhubing_6.jpg new file mode 100644 index 0000000000000000000000000000000000000000..645ad6c872b1ea504eed6da3bb89d17229e29f02 Binary files /dev/null and b/docs/images/recognition/product_demo/result/daoxiangcunjinzhubing_6.jpg differ diff --git a/docs/zh_CN/tutorials/quick_start_recognition.md b/docs/zh_CN/tutorials/quick_start_recognition.md index d2c3ed145f116e757ff3094dc914448df4d2f390..662b95d26a6ea7dba60a105ed6da754cd4b85888 100644 --- a/docs/zh_CN/tutorials/quick_start_recognition.md +++ b/docs/zh_CN/tutorials/quick_start_recognition.md @@ -34,13 +34,16 @@ 检测模型与4个方向(Logo、动漫人物、车辆、商品)的识别inference模型、测试数据下载地址以及对应的配置文件地址如下。 -| 模型简介 | 推荐场景 | 测试数据地址 | inference模型 | 预测配置文件 | 构建索引库的配置文件 | -| ------------ | ------------- | ------- | -------- | ------- | -------- | -| 通用主体检测模型 | 通用场景 | - |[模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar) | - | - | -| Logo识别模型 | Logo场景 | [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/logo_demo_data_v1.0.tar) | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/logo_rec_ResNet50_Logo3K_v1.0_infer.tar) | [inference_logo.yaml](../../../deploy/configs/inference_logo.yaml) | [build_logo.yaml](../../../deploy/configs/build_logo.yaml) | -| 动漫人物识别模型 | 动漫人物场景 | [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/cartoon_demo_data_v1.0.tar) | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/cartoon_rec_ResNet50_iCartoon_v1.0_infer.tar) | [inference_cartoon.yaml](../../../deploy/configs/inference_cartoon.yaml) | [build_cartoon.yaml](../../../deploy/configs/build_cartoon.yaml) | -| 车辆细分类模型 | 车辆场景 | [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/vehicle_demo_data_v1.0.tar) | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/vehicle_cls_ResNet50_CompCars_v1.0_infer.tar) | [inference_vehicle.yaml](../../../deploy/configs/inference_vehicle.yaml) | [build_vehicle.yaml](../../../deploy/configs/build_vehicle.yaml) | -| 商品识别模型 | 商品场景 | [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/product_demo_data_v1.0.tar) | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/product_ResNet50_vd_aliproduct_v1.0_infer.tar) | [inference_product.yaml](../../../deploy/configs/inference_product.yaml) | [build_product.yaml](../../../deploy/configs/build_product.yaml) | +| 模型简介 | 推荐场景 | inference模型 | 预测配置文件 | 构建索引库的配置文件 | +| ------------ | ------------- | -------- | ------- | -------- | +| 通用主体检测模型 | 通用场景 |[模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar) | - | - | +| Logo识别模型 | Logo场景 | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/logo_rec_ResNet50_Logo3K_v1.0_infer.tar) | [inference_logo.yaml](../../../deploy/configs/inference_logo.yaml) | [build_logo.yaml](../../../deploy/configs/build_logo.yaml) | +| 动漫人物识别模型 | 动漫人物场景 | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/cartoon_rec_ResNet50_iCartoon_v1.0_infer.tar) | [inference_cartoon.yaml](../../../deploy/configs/inference_cartoon.yaml) | [build_cartoon.yaml](../../../deploy/configs/build_cartoon.yaml) | +| 车辆细分类模型 | 车辆场景 | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/vehicle_cls_ResNet50_CompCars_v1.0_infer.tar) | [inference_vehicle.yaml](../../../deploy/configs/inference_vehicle.yaml) | [build_vehicle.yaml](../../../deploy/configs/build_vehicle.yaml) | +| 商品识别模型 | 商品场景 | [模型下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/product_ResNet50_vd_aliproduct_v1.0_infer.tar) | [inference_product.yaml](../../../deploy/configs/inference_product.yaml) | [build_product.yaml](../../../deploy/configs/build_product.yaml) | + + +本章节demo数据下载地址如下: [数据下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/recognition_demo_data_v1.0.tar)。 **注意** @@ -60,32 +63,23 @@ brew install wget * 可以按照下面的命令下载并解压数据与模型 ```shell -mkdir dataset -cd dataset -# 下载demo数据并解压 -wget {数据下载链接地址} && tar -xf {压缩包的名称} -cd .. - mkdir models cd models # 下载识别inference模型并解压 wget {模型下载链接地址} && tar -xf {压缩包的名称} cd .. + +# 下载demo数据并解压 +wget {数据下载链接地址} && tar -xf {压缩包的名称} ``` ### 2.1 下载、解压inference 模型与demo数据 -以商品识别为例,下载通用检测、识别模型以及商品识别demo数据,命令如下。 +以商品识别为例,下载demo数据集以及通用检测、识别模型,命令如下。 ```shell -mkdir dataset -cd dataset -# 下载demo数据并解压 -wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/product_demo_data_v1.0.tar && tar -xf product_demo_data_v1.0.tar -cd .. - mkdir models cd models # 下载通用检测inference模型并解压 @@ -93,21 +87,27 @@ wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/infere # 下载识别inference模型并解压 wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/product_ResNet50_vd_aliproduct_v1.0_infer.tar && tar -xf product_ResNet50_vd_aliproduct_v1.0_infer.tar -cd .. +# 下载demo数据并解压 +wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/recognition_demo_data_v1.0.tar && tar -xf recognition_demo_data_v1.0.tar ``` -解压完毕后,`dataset`文件夹下应有如下文件结构: +解压完毕后,`recognition_demo_data_v1.0`文件夹下应有如下文件结构: ``` -├── product_demo_data_v1.0 -│ ├── data_file.txt -│ ├── gallery -│ ├── index -│ └── query +├── recognition_demo_data_v1.0 +│ ├── gallery_cartoon +│ ├── gallery_logo +│ ├── gallery_product +│ ├── gallery_vehicle +│ ├── test_cartoon +│ ├── test_logo +│ ├── test_product +│ └── test_vehicle ├── ... ``` -其中`data_file.txt`是用于构建索引库的图像列表文件,`gallery`文件夹中是所有用于构建索引库的图像原始文件,`index`文件夹中是构建索引库生成的索引文件,`query`是用来测试识别效果的demo图像。 +其中`gallery_xxx`文件夹中存放的是用于构建索引库的原始图像,`test_xxx`文件夹中存放的是用于测试识别效果的图像列表。 + `models`文件夹下应有如下文件结构: @@ -131,7 +131,7 @@ cd .. #### 2.2.1 识别单张图像 -运行下面的命令,对图像`./dataset/product_demo_data_v1.0/query/wangzai.jpg`进行识别与检索 +运行下面的命令,对图像`./recognition_demo_data_v1.0/test_product/daoxiangcunjinzhubing_6.jpg`进行识别与检索 ```shell # 使用下面的命令使用GPU进行预测 @@ -140,29 +140,30 @@ python3.7 python/predict_system.py -c configs/inference_product.yaml python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.use_gpu=False ``` +注意:这里使用了默认编译生成的库文件进行特征索引,如果与您的环境不兼容,导致程序报错,可以参考[向量检索教程](../../../deploy/vector_search/README.md)重新编译库文件。 + 待检索图像如下所示。
- +
最终输出结果如下。 ``` -[{'bbox': [305, 226, 776, 930], 'rec_docs': ['旺仔牛奶', '旺仔牛奶', '旺仔牛奶', '旺仔牛奶', '康师傅方便面'], 'rec_scores': array([1328.1072998 , 1185.92248535, 846.88220215, 746.28546143 622.2668457 ])} +[{'bbox': [287, 129, 497, 326], 'rec_docs': '稻香村金猪饼', 'rec_scores': 0.8309420943260193}, {'bbox': [99, 242, 313, 426], 'rec_docs': '稻香村金猪饼', 'rec_scores': 0.7245652079582214}] ``` -其中bbox表示检测出的主体所在位置,rec_docs表示索引库中与检出主体最相近的若干张图像对应的标签,rec_scores表示对应的相似度。由rec_docs字段可以看出,返回的5个结果中,有4个为`旺仔牛奶`,识别正确。 +其中bbox表示检测出的主体所在位置,rec_docs表示索引库中与检测框最为相似的类别,rec_scores表示对应的置信度。 -检测的可视化结果也保存在`output`文件夹下。 +检测的可视化结果也保存在`output`文件夹下,对于本张图像,识别结果可视化如下所示。
- +
- #### 2.2.2 基于文件夹的批量识别 @@ -170,34 +171,47 @@ python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.u ```shell # 使用下面的命令使用GPU进行预测,如果希望使用CPU预测,可以在命令后面添加-o Global.use_gpu=False -python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./dataset/product_demo_data_v1.0/query/" +python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./recognition_demo_data_v1.0/test_product/" ``` +终端中会输出该文件夹内所有图像的识别结果,如下所示。 + +``` +... +[{'bbox': [37, 29, 123, 89], 'rec_docs': '香奈儿包', 'rec_scores': 0.6163763999938965}, {'bbox': [153, 96, 235, 175], 'rec_docs': '香奈儿包', 'rec_scores': 0.5279821157455444}] +[{'bbox': [735, 562, 1133, 851], 'rec_docs': '香奈儿包', 'rec_scores': 0.5588355660438538}] +[{'bbox': [124, 50, 230, 129], 'rec_docs': '香奈儿包', 'rec_scores': 0.6980369687080383}] +[{'bbox': [0, 0, 275, 183], 'rec_docs': '香奈儿包', 'rec_scores': 0.5818190574645996}] +[{'bbox': [400, 1179, 905, 1537], 'rec_docs': '香奈儿包', 'rec_scores': 0.9814301133155823}] +[{'bbox': [544, 4, 1482, 932], 'rec_docs': '香奈儿包', 'rec_scores': 0.5143815279006958}] +[{'bbox': [29, 42, 194, 183], 'rec_docs': '香奈儿包', 'rec_scores': 0.9543638229370117}] +... +``` + +所有图像的识别结果可视化图像也保存在`output`文件夹内。 + + 更多地,可以通过修改`Global.rec_inference_model_dir`字段来更改识别inference模型的路径,通过修改`IndexProcess.index_path`字段来更改索引库索引的路径。 ## 3. 未知类别的图像识别体验 -对图像`./dataset/product_demo_data_v1.0/query/anmuxi.jpg`进行识别,命令如下 +对图像`./recognition_demo_data_v1.0/test_product/anmuxi.jpg`进行识别,命令如下 ```shell # 使用下面的命令使用GPU进行预测,如果希望使用CPU预测,可以在命令后面添加-o Global.use_gpu=False -python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./dataset/product_demo_data_v1.0/query/anmuxi.jpg" +python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./recognition_demo_data_v1.0/test_product/anmuxi.jpg" ``` 待检索图像如下所示。
- +
-输出结果如下 - -``` -[{'bbox': [243, 80, 523, 522], 'rec_docs': ['娃哈哈AD钙奶', '旺仔牛奶', '娃哈哈AD钙奶', '农夫山泉矿泉水', '红牛'], 'rec_scores': array([548.33282471, 411.85687256, 408.39770508, 400.89404297, 360.41540527])}] -``` +输出结果为空。 由于默认的索引库中不包含对应的索引信息,所以这里的识别结果有误,此时我们可以通过构建新的索引库的方式,完成未知类别的图像识别。 @@ -206,31 +220,31 @@ python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.i ### 3.1 准备新的数据与标签 -首先需要将与待检索图像相似的图像列表拷贝到索引库原始图像的文件夹(`./dataset/product_demo_data_v1.0.0/gallery`)中,运行下面的命令拷贝相似图像。 +首先需要将与待检索图像相似的图像列表拷贝到索引库原始图像的文件夹(`./recognition_demo_data_v1.0/gallery_product/gallery`)中,运行下面的命令拷贝相似图像。 ```shell -cp -r ../docs/images/recognition/product_demo/gallery/anmuxi ./dataset/product_demo_data_v1.0/gallery/ +cp -r ../docs/images/recognition/product_demo/gallery/anmuxi ./recognition_demo_data_v1.0/gallery_product/gallery/ ``` -然后需要编辑记录了图像路径和标签信息的文本文件(`./dataset/product_demo_data_v1.0/data_file.txt`),这里基于原始标签文件,新建一个文件。命令如下。 +然后需要编辑记录了图像路径和标签信息的文本文件(`./recognition_demo_data_v1.0/gallery_product/data_file_update.txt`),这里基于原始标签文件,新建一个文件。命令如下。 ```shell # 复制文件 -cp dataset/product_demo_data_v1.0/data_file.txt dataset/product_demo_data_v1.0/data_file_update.txt +cp recognition_demo_data_v1.0/gallery_product/data_file.txt recognition_demo_data_v1.0/gallery_product/data_file_update.txt ``` -然后在文件`dataset/product_demo_data_v1.0/data_file_update.txt`中添加以下的信息, +然后在文件`recognition_demo_data_v1.0/gallery_product/data_file_update.txt`中添加以下的信息, ``` -gallery/anmuxi/001.jpg 安慕希酸奶 -gallery/anmuxi/002.jpg 安慕希酸奶 -gallery/anmuxi/003.jpg 安慕希酸奶 -gallery/anmuxi/004.jpg 安慕希酸奶 -gallery/anmuxi/005.jpg 安慕希酸奶 -gallery/anmuxi/006.jpg 安慕希酸奶 +gallery/anmuxi/001.jpg 安慕希酸奶 +gallery/anmuxi/002.jpg 安慕希酸奶 +gallery/anmuxi/003.jpg 安慕希酸奶 +gallery/anmuxi/004.jpg 安慕希酸奶 +gallery/anmuxi/005.jpg 安慕希酸奶 +gallery/anmuxi/006.jpg 安慕希酸奶 ``` -每一行的文本中,第一个字段表示图像的相对路径,第二个字段表示图像对应的标签信息,中间用`空格符`分隔开。 +每一行的文本中,第一个字段表示图像的相对路径,第二个字段表示图像对应的标签信息,中间用`tab`键分隔开(注意:有些编辑器会将`tab`自动转换为`空格`,这种情况下会导致文件解析报错)。 @@ -239,10 +253,10 @@ gallery/anmuxi/006.jpg 安慕希酸奶 使用下面的命令构建index索引,加速识别后的检索过程。 ```shell -python3.7 python/build_gallery.py -c configs/build_product.yaml -o IndexProcess.data_file="./dataset/product_demo_data_v1.0/data_file_update.txt" -o IndexProcess.index_path="./dataset/product_demo_data_v1.0/index_update" +python3.7 python/build_gallery.py -c configs/build_product.yaml -o IndexProcess.data_file="./recognition_demo_data_v1.0/gallery_product/data_file_update.txt" -o IndexProcess.index_path="./recognition_demo_data_v1.0/gallery_product/index_update" ``` -最终新的索引信息保存在文件夹`./dataset/product_demo_data_v1.0/index_update`中。 +最终新的索引信息保存在文件夹`./recognition_demo_data_v1.0/gallery_product/index_update`中。 @@ -252,13 +266,17 @@ python3.7 python/build_gallery.py -c configs/build_product.yaml -o IndexProcess. ```shell # 使用下面的命令使用GPU进行预测,如果希望使用CPU预测,可以在命令后面添加-o Global.use_gpu=False -python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./dataset/product_demo_data_v1.0/query/anmuxi.jpg" -o IndexProcess.index_path="./dataset/product_demo_data_v1.0/index_update" +python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.infer_imgs="./recognition_demo_data_v1.0/test_product/anmuxi.jpg" -o IndexProcess.index_path="./recognition_demo_data_v1.0/gallery_product/index_update" ``` 输出结果如下。 ``` -[{'bbox': [243, 80, 523, 522], 'rec_docs': ['安慕希酸奶', '娃哈哈AD钙奶', '安慕希酸奶', '安慕希酸奶', '安慕希酸奶'], 'rec_scores': array([1214.9597168 , 548.33282471, 547.82104492, 535.13201904, 471.52706909])}] +[{'bbox': [243, 80, 523, 522], 'rec_docs': '安慕希酸奶', 'rec_scores': 0.5570770502090454}] ``` -返回的5个结果中,有4个为`安慕希酸奶`,识别结果正确。 +最终识别结果为`安慕希酸奶`,识别正确,识别结果可视化如下所示。 + +
+ +