未验证 提交 db6bbbab 编写于 作者: K Kirill Sizov 提交者: GitHub

Fix dataset import for Datumaro format (#4544)

* Datumaro format: add load_data_callback

* add test

* fix test

* fix project dataset uploading for some formats

* Fix black

* Update Changelog

* Update README.md

* Update README.md
Co-authored-by: NMaxim Zhiltsov <zhiltsov.max35@gmail.com>

* Fixes

* Remove useless trailing backslashes

* Fix tests

* Fix test

* Join tests

* Small fix

* Fix remark
Co-authored-by: Nkirill.sizov <kirill.sizov@intel.com>
Co-authored-by: Nyasakova-anastasia <yasakova_anastasiya@mail.ru>
Co-authored-by: NMaxim Zhiltsov <zhiltsov.max35@gmail.com>
上级 60a05c7c
......@@ -144,6 +144,7 @@ Skeleton (<https://github.com/cvat-ai/cvat/pull/1>), (<https://github.com/opencv
- Fix build dependencies for Siammask (<https://github.com/openvinotoolkit/cvat/pull/4486>)
- Bug: Exif orientation information handled incorrectly (<https://github.com/openvinotoolkit/cvat/pull/4529>)
- Fixed build of retinanet function image (<https://github.com/cvat-ai/cvat/pull/54>)
- Dataset import for Datumaro, KITTI and VGGFace2 formats (<https://github.com/opencv/cvat/pull/4544>)
- Bug: Import dataset of Imagenet format fail (<https://github.com/opencv/cvat/issues/4850>)
## \[2.0.0] - 2022-03-04
......
......@@ -132,7 +132,7 @@ For more information about the supported formats, look at the
| --------------------------------------------------------------------------------------------------------- | ------ | ------ |
| [CVAT for images](https://opencv.github.io/cvat/docs/manual/advanced/xml_format/#annotation) | ✔️ | ✔️ |
| [CVAT for a video](https://opencv.github.io/cvat/docs/manual/advanced/xml_format/#interpolation) | ✔️ | ✔️ |
| [Datumaro](https://github.com/cvat-ai/datumaro) | | ✔️ |
| [Datumaro](https://github.com/cvat-ai/datumaro) | ✔️ | ✔️ |
| [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/) | ✔️ | ✔️ |
| Segmentation masks from [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/) | ✔️ | ✔️ |
| [YOLO](https://pjreddie.com/darknet/yolo/) | ✔️ | ✔️ |
......@@ -140,6 +140,7 @@ For more information about the supported formats, look at the
| [MS COCO Keypoints Detection](http://cocodataset.org/#format-data) | ✔️ | ✔️ |
| [TFrecord](https://www.tensorflow.org/tutorials/load_data/tfrecord) | ✔️ | ✔️ |
| [MOT](https://motchallenge.net/) | ✔️ | ✔️ |
| [MOTS PNG](https://www.vision.rwth-aachen.de/page/mots) | ✔️ | ✔️ |
| [LabelMe 3.0](http://labelme.csail.mit.edu/Release3.0) | ✔️ | ✔️ |
| [ImageNet](http://www.image-net.org) | ✔️ | ✔️ |
| [CamVid](http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/) | ✔️ | ✔️ |
......@@ -150,7 +151,9 @@ For more information about the supported formats, look at the
| [Open Images V6](https://storage.googleapis.com/openimages/web/index.html) | ✔️ | ✔️ |
| [Cityscapes](https://www.cityscapes-dataset.com/login/) | ✔️ | ✔️ |
| [KITTI](http://www.cvlibs.net/datasets/kitti/) | ✔️ | ✔️ |
| [Kitti Raw Format](https://www.cvlibs.net/datasets/kitti/raw_data.php) | ✔️ | ✔️ |
| [LFW](http://vis-www.cs.umass.edu/lfw/) | ✔️ | ✔️ |
| [Supervisely Point Cloud Format](https://docs.supervise.ly/data-organization/00_ann_format_navi) | ✔️ | ✔️ |
<!--lint enable maximum-line-length-->
......
......@@ -36,12 +36,14 @@ def _export(dst_file, instance_data, save_images=False):
make_zip_archive(tmp_dir, dst_file)
@importer(name="Datumaro", ext="ZIP", version="1.0")
def _import(src_file, instance_data):
def _import(src_file, instance_data, load_data_callback=None):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
dataset = Dataset.import_from(tmp_dir, 'datumaro', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
@exporter(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D)
......@@ -58,10 +60,12 @@ def _export(dst_file, instance_data, save_images=False):
make_zip_archive(tmp_dir, dst_file)
@importer(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D)
def _import(src_file, instance_data):
def _import(src_file, instance_data, load_data_callback=None):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
dataset = Dataset.import_from(tmp_dir, 'datumaro', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
......@@ -35,7 +35,7 @@ def _export(dst_file, instance_data, save_images=False):
make_zip_archive(tmp_dir, dst_file)
@importer(name='KITTI', ext='ZIP', version='1.0')
def _import(src_file, instance_data):
def _import(src_file, instance_data, load_data_callback=None):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)
......@@ -51,4 +51,6 @@ def _import(src_file, instance_data):
filter_annotations=True)
dataset.transform('masks_to_polygons')
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
......@@ -8,7 +8,7 @@ from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, \
from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, TaskData, \
import_dm_annotations
from cvat.apps.dataset_manager.util import make_zip_archive
......@@ -30,7 +30,8 @@ def _import(src_file, instance_data, load_data_callback=None):
zipfile.ZipFile(src_file).extractall(tmp_dir)
dataset = Dataset.import_from(tmp_dir, 'vgg_face2', env=dm_env)
dataset.transform('rename', regex=r"|([^/]+/)?(.+)|\2|")
if isinstance(instance_data, TaskData):
dataset.transform('rename', regex=r"|([^/]+/)?(.+)|\2|")
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
......@@ -2,6 +2,11 @@
"main": {
"name": "Main project",
"labels": [
{
"name": "background",
"color": "#5c5eba",
"attributes": []
},
{
"name": "car",
"color": "#2080c0",
......
......@@ -184,6 +184,36 @@
}
]
},
"KITTI 1.0": {
"name": "kitti task",
"overlap": 0,
"segment_size": 100,
"labels": [
{
"name": "car",
"color": "#2080c0",
"attributes": [
{
"name": "is_crowd",
"mutable": false,
"input_type": "checkbox",
"default_value": "false",
"values": ["false", "true"]
}
]
},
{
"name": "person",
"color": "#c06060",
"attributes": []
},
{
"name": "background",
"color": "#000000",
"attributes": []
}
]
},
"wrong_checkbox_value": {
"name": "wrong checkbox value task",
"overlap": 0,
......
......@@ -199,6 +199,11 @@ class _DbTestBase(APITestCase):
response = self.client.put(path, data)
return response
def _post_request_with_data(self, path, data, user):
with ForceLogin(user, self.client):
response = self.client.post(path, data)
return response
def _delete_request(self, path, user):
with ForceLogin(user, self.client):
response = self.client.delete(path)
......@@ -349,6 +354,9 @@ class _DbTestBase(APITestCase):
def _generate_url_dump_project_dataset(self, project_id, format_name):
return f"/api/projects/{project_id}/dataset?format={format_name}"
def _generate_url_upload_project_dataset(self, project_id, format_name):
return f"/api/projects/{project_id}/dataset?format={format_name}"
def _remove_annotations(self, url, user):
response = self._delete_request(url, user)
self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT)
......@@ -436,7 +444,6 @@ class TaskDumpUploadTest(_DbTestBase):
if upload_format_name == "CVAT 1.1":
file_zip_name = osp.join(test_dir, f'{test_name}_admin_CVAT for images 1.1.zip')
else:
file_zip_name = osp.join(test_dir, f'{test_name}_admin_{upload_format_name}.zip')
if not upload_format.ENABLED or not osp.exists(file_zip_name):
continue
......@@ -925,21 +932,20 @@ class TaskDumpUploadTest(_DbTestBase):
dump_formats = dm.views.get_export_formats()
with TestDir() as test_dir:
for dump_format in dump_formats:
if not dump_format.ENABLED:
if not dump_format.ENABLED or dump_format.DIMENSION == dm.bindings.DimensionType.DIM_3D:
continue
dump_format_name = dump_format.DISPLAY_NAME
with self.subTest(format=dump_format_name):
if dump_format_name in [
"MOTS PNG 1.0", # issue #2925 and changed points values
'Kitti Raw Format 1.0',
'Sly Point Cloud Format 1.0',
'Datumaro 3D 1.0',
"Cityscapes 1.0" # expanding annotations due to background mask
]:
self.skipTest("Format is fail")
images = self._generate_task_images(3)
if dump_format_name in [
"Market-1501 1.0", "Cityscapes 1.0", \
"Market-1501 1.0",
"ICDAR Localization 1.0", "ICDAR Recognition 1.0", \
"ICDAR Segmentation 1.0", "COCO Keypoints 1.0",
]:
......@@ -947,11 +953,11 @@ class TaskDumpUploadTest(_DbTestBase):
else:
task = self._create_task(tasks["main"], images)
task_id = task["id"]
if dump_format_name in [
"MOT 1.1", "MOTS PNG 1.0",
"PASCAL VOC 1.1", "Segmentation mask 1.1",
"MOT 1.1", "PASCAL VOC 1.1", "Segmentation mask 1.1",
"TFRecord 1.0", "YOLO 1.1", "ImageNet 1.0",
"WiderFace 1.0", "VGGFace2 1.0", "Cityscapes 1.0",
"WiderFace 1.0", "VGGFace2 1.0",
"Datumaro 1.0", "Open Images V6 1.0", "KITTI 1.0"
]:
self._create_annotations(task, dump_format_name, "default")
......@@ -981,6 +987,7 @@ class TaskDumpUploadTest(_DbTestBase):
with open(file_zip_name, 'rb') as binary_file:
self._upload_file(url, binary_file, self.admin)
task_ann = TaskAnnotation(task_id)
task_ann.init_from_db()
task_ann_data = task_ann.data
......@@ -1209,10 +1216,11 @@ class TaskDumpUploadTest(_DbTestBase):
data_from_task_after_upload = self._get_data_from_task(task_id, include_images)
compare_datasets(self, data_from_task_before_upload, data_from_task_after_upload)
class ProjectDump(_DbTestBase):
def test_api_v2_export_dataset(self):
class ProjectDumpUpload(_DbTestBase):
def test_api_v2_export_import_dataset(self):
test_name = self._testMethodName
dump_formats = dm.views.get_export_formats()
upload_formats = dm.views.get_import_formats()
expected = {
self.admin: {'name': 'admin', 'code': status.HTTP_200_OK, 'create code': status.HTTP_201_CREATED,
......@@ -1228,41 +1236,83 @@ class ProjectDump(_DbTestBase):
if not dump_format.ENABLED or dump_format.DIMENSION == dm.bindings.DimensionType.DIM_3D:
continue
dump_format_name = dump_format.DISPLAY_NAME
with self.subTest(format=dump_format_name):
project = self._create_project(projects['main'])
pid = project['id']
images = self._generate_task_images(3)
tasks['task in project #1']['project_id'] = pid
self._create_task(tasks['task in project #1'], images)
images = self._generate_task_images(3, 3)
tasks['task in project #2']['project_id'] = pid
self._create_task(tasks['task in project #2'], images)
url = self._generate_url_dump_project_dataset(project['id'], dump_format_name)
if dump_format_name in [
'Cityscapes 1.0', 'LFW 1.0', 'Market-1501 1.0',
'MOT 1.1', 'TFRecord 1.0'
]:
# TO-DO: fix bug for this formats
continue
project = copy.deepcopy(projects['main'])
if dump_format_name in tasks:
project['labels'] = tasks[dump_format_name]['labels']
project = self._create_project(project)
tasks['task in project #1']['project_id'] = project['id']
task = self._create_task(tasks['task in project #1'], self._generate_task_images(3))
url = self._generate_url_dump_project_dataset(project['id'], dump_format_name)
if dump_format_name in [
"Cityscapes 1.0", "Datumaro 1.0", "ImageNet 1.0",
"MOT 1.1", "MOTS PNG 1.0", "PASCAL VOC 1.1",
"Segmentation mask 1.1", "TFRecord 1.0", "VGGFace2 1.0",
"WiderFace 1.0", "YOLO 1.1"
]:
self._create_annotations(task, dump_format_name, "default")
else:
self._create_annotations(task, dump_format_name, "random")
for user, edata in list(expected.items()):
user_name = edata['name']
file_zip_name = osp.join(test_dir, f'{test_name}_{user_name}_{dump_format_name}.zip')
data = {
"format": dump_format_name,
}
response = self._get_request_with_data(url, data, user)
self.assertEqual(response.status_code, edata["accept code"])
response = self._get_request_with_data(url, data, user)
self.assertEqual(response.status_code, edata["create code"])
data = {
"format": dump_format_name,
"action": "download",
}
response = self._get_request_with_data(url, data, user)
self.assertEqual(response.status_code, edata["code"])
if response.status_code == status.HTTP_200_OK:
content = BytesIO(b"".join(response.streaming_content))
with open(file_zip_name, "wb") as f:
f.write(content.getvalue())
self.assertEqual(response.status_code, edata['code'])
self.assertEqual(osp.exists(file_zip_name), edata['file_exists'])
for user, edata in list(expected.items()):
user_name = edata['name']
file_zip_name = osp.join(test_dir, f'{test_name}_{user_name}_{dump_format_name}.zip')
data = {
"format": dump_format_name,
}
response = self._get_request_with_data(url, data, user)
self.assertEqual(response.status_code, edata["accept code"])
response = self._get_request_with_data(url, data, user)
self.assertEqual(response.status_code, edata["create code"])
data = {
"format": dump_format_name,
"action": "download",
}
response = self._get_request_with_data(url, data, user)
self.assertEqual(response.status_code, edata["code"])
if response.status_code == status.HTTP_200_OK:
content = BytesIO(b"".join(response.streaming_content))
with open(file_zip_name, "wb") as f:
f.write(content.getvalue())
self.assertEqual(response.status_code, edata['code'])
self.assertEqual(osp.exists(file_zip_name), edata['file_exists'])
for upload_format in upload_formats:
if not upload_format.ENABLED or upload_format.DIMENSION == dm.bindings.DimensionType.DIM_3D:
continue
upload_format_name = upload_format.DISPLAY_NAME
if upload_format_name in [
'Cityscapes 1.0', 'LFW 1.0', 'Market-1501 1.0',
'MOT 1.1', 'TFRecord 1.0'
]:
# TO-DO: fix bug for this formats
continue
for user, edata in list(expected.items()):
project = copy.deepcopy(projects['main'])
if upload_format_name in tasks:
project['labels'] = tasks[upload_format_name]['labels']
project = self._create_project(project)
file_zip_name = osp.join(test_dir, f"{test_name}_{edata['name']}_{upload_format_name}.zip")
url = self._generate_url_upload_project_dataset(project['id'], upload_format_name)
if osp.exists(file_zip_name):
with open(file_zip_name, 'rb') as binary_file:
response = self._post_request_with_data(url, {"dataset_file": binary_file}, user)
self.assertEqual(response.status_code, edata['accept code'])
def test_api_v2_export_annotatios(self):
def test_api_v2_export_annotations(self):
test_name = self._testMethodName
dump_formats = dm.views.get_export_formats()
......
......@@ -458,10 +458,11 @@ class TestImportExportDatasetProject:
)
assert response.status == HTTPStatus.ACCEPTED
def test_can_import_export_dataset_with_imagenet_format(self):
@pytest.mark.parametrize("format_name", ("ImageNet 1.0", "Datumaro 1.0"))
def test_can_import_export_dataset_with_some_format(self, format_name):
# https://github.com/opencv/cvat/issues/4410
# https://github.com/opencv/cvat/issues/4850
username = "admin1"
format_name = "ImageNet 1.0"
project_id = 4
response = self._test_export_project(username, project_id, format_name)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册