From db6bbbab6aa8ba212ddf46bd7094978e08215705 Mon Sep 17 00:00:00 2001 From: Kirill Sizov Date: Tue, 1 Nov 2022 13:05:48 +0200 Subject: [PATCH] Fix dataset import for Datumaro format (#4544) * Datumaro format: add load_data_callback * add test * fix test * fix project dataset uploading for some formats * Fix black * Update Changelog * Update README.md * Update README.md Co-authored-by: Maxim Zhiltsov * Fixes * Remove useless trailing backslashes * Fix tests * Fix test * Join tests * Small fix * Fix remark Co-authored-by: kirill.sizov Co-authored-by: yasakova-anastasia Co-authored-by: Maxim Zhiltsov --- CHANGELOG.md | 1 + README.md | 5 +- cvat/apps/dataset_manager/formats/datumaro.py | 8 +- cvat/apps/dataset_manager/formats/kitti.py | 4 +- cvat/apps/dataset_manager/formats/vggface2.py | 5 +- .../tests/assets/projects.json | 5 + .../dataset_manager/tests/assets/tasks.json | 30 ++++ .../tests/test_rest_api_formats.py | 138 ++++++++++++------ tests/python/rest_api/test_projects.py | 5 +- 9 files changed, 149 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7aec2a454..223e1ffed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -144,6 +144,7 @@ Skeleton (), () - Bug: Exif orientation information handled incorrectly () - Fixed build of retinanet function image () +- Dataset import for Datumaro, KITTI and VGGFace2 formats () - Bug: Import dataset of Imagenet format fail () ## \[2.0.0] - 2022-03-04 diff --git a/README.md b/README.md index 87c6d9982..69f0d4a03 100644 --- a/README.md +++ b/README.md @@ -132,7 +132,7 @@ For more information about the supported formats, look at the | --------------------------------------------------------------------------------------------------------- | ------ | ------ | | [CVAT for images](https://opencv.github.io/cvat/docs/manual/advanced/xml_format/#annotation) | ✔️ | ✔️ | | [CVAT for a video](https://opencv.github.io/cvat/docs/manual/advanced/xml_format/#interpolation) | ✔️ | ✔️ | -| [Datumaro](https://github.com/cvat-ai/datumaro) | | ✔️ | +| [Datumaro](https://github.com/cvat-ai/datumaro) | ✔️ | ✔️ | | [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/) | ✔️ | ✔️ | | Segmentation masks from [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/) | ✔️ | ✔️ | | [YOLO](https://pjreddie.com/darknet/yolo/) | ✔️ | ✔️ | @@ -140,6 +140,7 @@ For more information about the supported formats, look at the | [MS COCO Keypoints Detection](http://cocodataset.org/#format-data) | ✔️ | ✔️ | | [TFrecord](https://www.tensorflow.org/tutorials/load_data/tfrecord) | ✔️ | ✔️ | | [MOT](https://motchallenge.net/) | ✔️ | ✔️ | +| [MOTS PNG](https://www.vision.rwth-aachen.de/page/mots) | ✔️ | ✔️ | | [LabelMe 3.0](http://labelme.csail.mit.edu/Release3.0) | ✔️ | ✔️ | | [ImageNet](http://www.image-net.org) | ✔️ | ✔️ | | [CamVid](http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/) | ✔️ | ✔️ | @@ -150,7 +151,9 @@ For more information about the supported formats, look at the | [Open Images V6](https://storage.googleapis.com/openimages/web/index.html) | ✔️ | ✔️ | | [Cityscapes](https://www.cityscapes-dataset.com/login/) | ✔️ | ✔️ | | [KITTI](http://www.cvlibs.net/datasets/kitti/) | ✔️ | ✔️ | +| [Kitti Raw Format](https://www.cvlibs.net/datasets/kitti/raw_data.php) | ✔️ | ✔️ | | [LFW](http://vis-www.cs.umass.edu/lfw/) | ✔️ | ✔️ | +| [Supervisely Point Cloud Format](https://docs.supervise.ly/data-organization/00_ann_format_navi) | ✔️ | ✔️ | diff --git a/cvat/apps/dataset_manager/formats/datumaro.py b/cvat/apps/dataset_manager/formats/datumaro.py index c7b08215f..b90bc5bea 100644 --- a/cvat/apps/dataset_manager/formats/datumaro.py +++ b/cvat/apps/dataset_manager/formats/datumaro.py @@ -36,12 +36,14 @@ def _export(dst_file, instance_data, save_images=False): make_zip_archive(tmp_dir, dst_file) @importer(name="Datumaro", ext="ZIP", version="1.0") -def _import(src_file, instance_data): +def _import(src_file, instance_data, load_data_callback=None): with TemporaryDirectory() as tmp_dir: Archive(src_file.name).extractall(tmp_dir) dataset = Dataset.import_from(tmp_dir, 'datumaro', env=dm_env) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) @exporter(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D) @@ -58,10 +60,12 @@ def _export(dst_file, instance_data, save_images=False): make_zip_archive(tmp_dir, dst_file) @importer(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D) -def _import(src_file, instance_data): +def _import(src_file, instance_data, load_data_callback=None): with TemporaryDirectory() as tmp_dir: Archive(src_file.name).extractall(tmp_dir) dataset = Dataset.import_from(tmp_dir, 'datumaro', env=dm_env) + if load_data_callback is not None: + load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/kitti.py b/cvat/apps/dataset_manager/formats/kitti.py index d3296b8a7..a380d76ad 100644 --- a/cvat/apps/dataset_manager/formats/kitti.py +++ b/cvat/apps/dataset_manager/formats/kitti.py @@ -35,7 +35,7 @@ def _export(dst_file, instance_data, save_images=False): make_zip_archive(tmp_dir, dst_file) @importer(name='KITTI', ext='ZIP', version='1.0') -def _import(src_file, instance_data): +def _import(src_file, instance_data, load_data_callback=None): with TemporaryDirectory() as tmp_dir: Archive(src_file.name).extractall(tmp_dir) @@ -51,4 +51,6 @@ def _import(src_file, instance_data): filter_annotations=True) dataset.transform('masks_to_polygons') + if load_data_callback is not None: + load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/vggface2.py b/cvat/apps/dataset_manager/formats/vggface2.py index e5f3eca1a..b01797994 100644 --- a/cvat/apps/dataset_manager/formats/vggface2.py +++ b/cvat/apps/dataset_manager/formats/vggface2.py @@ -8,7 +8,7 @@ from tempfile import TemporaryDirectory from datumaro.components.dataset import Dataset -from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, \ +from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, TaskData, \ import_dm_annotations from cvat.apps.dataset_manager.util import make_zip_archive @@ -30,7 +30,8 @@ def _import(src_file, instance_data, load_data_callback=None): zipfile.ZipFile(src_file).extractall(tmp_dir) dataset = Dataset.import_from(tmp_dir, 'vgg_face2', env=dm_env) - dataset.transform('rename', regex=r"|([^/]+/)?(.+)|\2|") + if isinstance(instance_data, TaskData): + dataset.transform('rename', regex=r"|([^/]+/)?(.+)|\2|") if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/tests/assets/projects.json b/cvat/apps/dataset_manager/tests/assets/projects.json index c62084678..9b4d9de54 100644 --- a/cvat/apps/dataset_manager/tests/assets/projects.json +++ b/cvat/apps/dataset_manager/tests/assets/projects.json @@ -2,6 +2,11 @@ "main": { "name": "Main project", "labels": [ + { + "name": "background", + "color": "#5c5eba", + "attributes": [] + }, { "name": "car", "color": "#2080c0", diff --git a/cvat/apps/dataset_manager/tests/assets/tasks.json b/cvat/apps/dataset_manager/tests/assets/tasks.json index e5c4e140e..1673b0249 100644 --- a/cvat/apps/dataset_manager/tests/assets/tasks.json +++ b/cvat/apps/dataset_manager/tests/assets/tasks.json @@ -184,6 +184,36 @@ } ] }, + "KITTI 1.0": { + "name": "kitti task", + "overlap": 0, + "segment_size": 100, + "labels": [ + { + "name": "car", + "color": "#2080c0", + "attributes": [ + { + "name": "is_crowd", + "mutable": false, + "input_type": "checkbox", + "default_value": "false", + "values": ["false", "true"] + } + ] + }, + { + "name": "person", + "color": "#c06060", + "attributes": [] + }, + { + "name": "background", + "color": "#000000", + "attributes": [] + } + ] + }, "wrong_checkbox_value": { "name": "wrong checkbox value task", "overlap": 0, diff --git a/cvat/apps/dataset_manager/tests/test_rest_api_formats.py b/cvat/apps/dataset_manager/tests/test_rest_api_formats.py index 1f9c1bf89..93327305a 100644 --- a/cvat/apps/dataset_manager/tests/test_rest_api_formats.py +++ b/cvat/apps/dataset_manager/tests/test_rest_api_formats.py @@ -199,6 +199,11 @@ class _DbTestBase(APITestCase): response = self.client.put(path, data) return response + def _post_request_with_data(self, path, data, user): + with ForceLogin(user, self.client): + response = self.client.post(path, data) + return response + def _delete_request(self, path, user): with ForceLogin(user, self.client): response = self.client.delete(path) @@ -349,6 +354,9 @@ class _DbTestBase(APITestCase): def _generate_url_dump_project_dataset(self, project_id, format_name): return f"/api/projects/{project_id}/dataset?format={format_name}" + def _generate_url_upload_project_dataset(self, project_id, format_name): + return f"/api/projects/{project_id}/dataset?format={format_name}" + def _remove_annotations(self, url, user): response = self._delete_request(url, user) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) @@ -436,7 +444,6 @@ class TaskDumpUploadTest(_DbTestBase): if upload_format_name == "CVAT 1.1": file_zip_name = osp.join(test_dir, f'{test_name}_admin_CVAT for images 1.1.zip') else: - file_zip_name = osp.join(test_dir, f'{test_name}_admin_{upload_format_name}.zip') if not upload_format.ENABLED or not osp.exists(file_zip_name): continue @@ -925,21 +932,20 @@ class TaskDumpUploadTest(_DbTestBase): dump_formats = dm.views.get_export_formats() with TestDir() as test_dir: for dump_format in dump_formats: - if not dump_format.ENABLED: + if not dump_format.ENABLED or dump_format.DIMENSION == dm.bindings.DimensionType.DIM_3D: continue dump_format_name = dump_format.DISPLAY_NAME + with self.subTest(format=dump_format_name): if dump_format_name in [ "MOTS PNG 1.0", # issue #2925 and changed points values - 'Kitti Raw Format 1.0', - 'Sly Point Cloud Format 1.0', - 'Datumaro 3D 1.0', "Cityscapes 1.0" # expanding annotations due to background mask ]: self.skipTest("Format is fail") + images = self._generate_task_images(3) if dump_format_name in [ - "Market-1501 1.0", "Cityscapes 1.0", \ + "Market-1501 1.0", "ICDAR Localization 1.0", "ICDAR Recognition 1.0", \ "ICDAR Segmentation 1.0", "COCO Keypoints 1.0", ]: @@ -947,11 +953,11 @@ class TaskDumpUploadTest(_DbTestBase): else: task = self._create_task(tasks["main"], images) task_id = task["id"] + if dump_format_name in [ - "MOT 1.1", "MOTS PNG 1.0", - "PASCAL VOC 1.1", "Segmentation mask 1.1", + "MOT 1.1", "PASCAL VOC 1.1", "Segmentation mask 1.1", "TFRecord 1.0", "YOLO 1.1", "ImageNet 1.0", - "WiderFace 1.0", "VGGFace2 1.0", "Cityscapes 1.0", + "WiderFace 1.0", "VGGFace2 1.0", "Datumaro 1.0", "Open Images V6 1.0", "KITTI 1.0" ]: self._create_annotations(task, dump_format_name, "default") @@ -981,6 +987,7 @@ class TaskDumpUploadTest(_DbTestBase): with open(file_zip_name, 'rb') as binary_file: self._upload_file(url, binary_file, self.admin) + task_ann = TaskAnnotation(task_id) task_ann.init_from_db() task_ann_data = task_ann.data @@ -1209,10 +1216,11 @@ class TaskDumpUploadTest(_DbTestBase): data_from_task_after_upload = self._get_data_from_task(task_id, include_images) compare_datasets(self, data_from_task_before_upload, data_from_task_after_upload) -class ProjectDump(_DbTestBase): - def test_api_v2_export_dataset(self): +class ProjectDumpUpload(_DbTestBase): + def test_api_v2_export_import_dataset(self): test_name = self._testMethodName dump_formats = dm.views.get_export_formats() + upload_formats = dm.views.get_import_formats() expected = { self.admin: {'name': 'admin', 'code': status.HTTP_200_OK, 'create code': status.HTTP_201_CREATED, @@ -1228,41 +1236,83 @@ class ProjectDump(_DbTestBase): if not dump_format.ENABLED or dump_format.DIMENSION == dm.bindings.DimensionType.DIM_3D: continue dump_format_name = dump_format.DISPLAY_NAME - with self.subTest(format=dump_format_name): - project = self._create_project(projects['main']) - pid = project['id'] - images = self._generate_task_images(3) - tasks['task in project #1']['project_id'] = pid - self._create_task(tasks['task in project #1'], images) - images = self._generate_task_images(3, 3) - tasks['task in project #2']['project_id'] = pid - self._create_task(tasks['task in project #2'], images) - url = self._generate_url_dump_project_dataset(project['id'], dump_format_name) + if dump_format_name in [ + 'Cityscapes 1.0', 'LFW 1.0', 'Market-1501 1.0', + 'MOT 1.1', 'TFRecord 1.0' + ]: + # TO-DO: fix bug for this formats + continue + project = copy.deepcopy(projects['main']) + if dump_format_name in tasks: + project['labels'] = tasks[dump_format_name]['labels'] + project = self._create_project(project) + tasks['task in project #1']['project_id'] = project['id'] + task = self._create_task(tasks['task in project #1'], self._generate_task_images(3)) + + url = self._generate_url_dump_project_dataset(project['id'], dump_format_name) + + if dump_format_name in [ + "Cityscapes 1.0", "Datumaro 1.0", "ImageNet 1.0", + "MOT 1.1", "MOTS PNG 1.0", "PASCAL VOC 1.1", + "Segmentation mask 1.1", "TFRecord 1.0", "VGGFace2 1.0", + "WiderFace 1.0", "YOLO 1.1" + ]: + self._create_annotations(task, dump_format_name, "default") + else: + self._create_annotations(task, dump_format_name, "random") - for user, edata in list(expected.items()): - user_name = edata['name'] - file_zip_name = osp.join(test_dir, f'{test_name}_{user_name}_{dump_format_name}.zip') - data = { - "format": dump_format_name, - } - response = self._get_request_with_data(url, data, user) - self.assertEqual(response.status_code, edata["accept code"]) - response = self._get_request_with_data(url, data, user) - self.assertEqual(response.status_code, edata["create code"]) - data = { - "format": dump_format_name, - "action": "download", - } - response = self._get_request_with_data(url, data, user) - self.assertEqual(response.status_code, edata["code"]) - if response.status_code == status.HTTP_200_OK: - content = BytesIO(b"".join(response.streaming_content)) - with open(file_zip_name, "wb") as f: - f.write(content.getvalue()) - self.assertEqual(response.status_code, edata['code']) - self.assertEqual(osp.exists(file_zip_name), edata['file_exists']) + for user, edata in list(expected.items()): + user_name = edata['name'] + file_zip_name = osp.join(test_dir, f'{test_name}_{user_name}_{dump_format_name}.zip') + data = { + "format": dump_format_name, + } + + response = self._get_request_with_data(url, data, user) + self.assertEqual(response.status_code, edata["accept code"]) + + response = self._get_request_with_data(url, data, user) + self.assertEqual(response.status_code, edata["create code"]) + + data = { + "format": dump_format_name, + "action": "download", + } + response = self._get_request_with_data(url, data, user) + self.assertEqual(response.status_code, edata["code"]) + + if response.status_code == status.HTTP_200_OK: + content = BytesIO(b"".join(response.streaming_content)) + with open(file_zip_name, "wb") as f: + f.write(content.getvalue()) + + self.assertEqual(response.status_code, edata['code']) + self.assertEqual(osp.exists(file_zip_name), edata['file_exists']) + + for upload_format in upload_formats: + if not upload_format.ENABLED or upload_format.DIMENSION == dm.bindings.DimensionType.DIM_3D: + continue + upload_format_name = upload_format.DISPLAY_NAME + if upload_format_name in [ + 'Cityscapes 1.0', 'LFW 1.0', 'Market-1501 1.0', + 'MOT 1.1', 'TFRecord 1.0' + ]: + # TO-DO: fix bug for this formats + continue + for user, edata in list(expected.items()): + project = copy.deepcopy(projects['main']) + if upload_format_name in tasks: + project['labels'] = tasks[upload_format_name]['labels'] + project = self._create_project(project) + file_zip_name = osp.join(test_dir, f"{test_name}_{edata['name']}_{upload_format_name}.zip") + url = self._generate_url_upload_project_dataset(project['id'], upload_format_name) + + if osp.exists(file_zip_name): + with open(file_zip_name, 'rb') as binary_file: + response = self._post_request_with_data(url, {"dataset_file": binary_file}, user) + self.assertEqual(response.status_code, edata['accept code']) - def test_api_v2_export_annotatios(self): + def test_api_v2_export_annotations(self): test_name = self._testMethodName dump_formats = dm.views.get_export_formats() diff --git a/tests/python/rest_api/test_projects.py b/tests/python/rest_api/test_projects.py index e97221eca..ff0a568d5 100644 --- a/tests/python/rest_api/test_projects.py +++ b/tests/python/rest_api/test_projects.py @@ -458,10 +458,11 @@ class TestImportExportDatasetProject: ) assert response.status == HTTPStatus.ACCEPTED - def test_can_import_export_dataset_with_imagenet_format(self): + @pytest.mark.parametrize("format_name", ("ImageNet 1.0", "Datumaro 1.0")) + def test_can_import_export_dataset_with_some_format(self, format_name): + # https://github.com/opencv/cvat/issues/4410 # https://github.com/opencv/cvat/issues/4850 username = "admin1" - format_name = "ImageNet 1.0" project_id = 4 response = self._test_export_project(username, project_id, format_name) -- GitLab