未验证 提交 15c3a63b 编写于 作者: M Maria Khrustaleva 提交者: GitHub

Fix export job dataset (#5052)

上级 c37a5c00
......@@ -47,6 +47,7 @@ non-ascii paths while adding files from "Connected file share" (issue #4428)
(<https://github.com/opencv/cvat/pull/5057>)
- Double modal export/backup a task/project (<https://github.com/opencv/cvat/pull/5075>)
- Fixed bug of computing Job's unsolved/resolved issues numbers (<https://github.com/opencv/cvat/pull/5101>)
- Dataset export for job (<https://github.com/opencv/cvat/pull/5052>)
### Security
- TDB
......
......@@ -21,7 +21,7 @@ from datumaro.components.extractor import (DEFAULT_SUBSET_NAME, Extractor,
from datumaro.util.image import Image
from defusedxml import ElementTree
from cvat.apps.dataset_manager.bindings import (ProjectData, TaskData,
from cvat.apps.dataset_manager.bindings import (ProjectData, CommonData,
get_defaulted_subset,
import_dm_annotations,
match_dm_item)
......@@ -984,11 +984,11 @@ def dump_as_cvat_interpolation(dumper, annotations):
counter += 1
for shape in annotations.shapes:
frame_step = annotations.frame_step if isinstance(annotations, TaskData) else annotations.frame_step[shape.task_id]
if isinstance(annotations, TaskData):
stop_frame = int(annotations.meta['task']['stop_frame'])
frame_step = annotations.frame_step if not isinstance(annotations, ProjectData) else annotations.frame_step[shape.task_id]
if not isinstance(annotations, ProjectData):
stop_frame = int(annotations.meta[annotations.META_FIELD]['stop_frame'])
else:
task_meta = list(filter(lambda task: int(task[1]['id']) == shape.task_id, annotations.meta['project']['tasks']))[0][1]
task_meta = list(filter(lambda task: int(task[1]['id']) == shape.task_id, annotations.meta[annotations.META_FIELD]['tasks']))[0][1]
stop_frame = int(task_meta['stop_frame'])
track = {
'label': shape.label,
......@@ -1102,7 +1102,7 @@ def load_anno(file_object, annotations):
attributes={'frame': el.attrib['id']},
image=el.attrib['name']
),
task_data=annotations
instance_data=annotations
))
elif el.tag in supported_shapes and (track is not None or image_is_opened):
if shape and shape['type'] == 'skeleton':
......@@ -1258,10 +1258,10 @@ def load_anno(file_object, annotations):
tag = None
el.clear()
def dump_task_anno(dst_file, task_data, callback):
def dump_task_or_job_anno(dst_file, instance_data, callback):
dumper = create_xml_dumper(dst_file)
dumper.open_document()
callback(dumper, task_data)
callback(dumper, instance_data)
dumper.close_document()
def dump_project_anno(dst_file: BufferedWriter, project_data: ProjectData, callback: Callable):
......@@ -1270,33 +1270,34 @@ def dump_project_anno(dst_file: BufferedWriter, project_data: ProjectData, callb
callback(dumper, project_data)
dumper.close_document()
def dump_media_files(task_data: TaskData, img_dir: str, project_data: ProjectData = None):
def dump_media_files(instance_data: CommonData, img_dir: str, project_data: ProjectData = None):
ext = ''
if task_data.meta['task']['mode'] == 'interpolation':
if instance_data.meta[instance_data.META_FIELD]['mode'] == 'interpolation':
ext = FrameProvider.VIDEO_FRAME_EXT
frame_provider = FrameProvider(task_data.db_task.data)
frame_provider = FrameProvider(instance_data.db_data)
frames = frame_provider.get_frames(
instance_data.start, instance_data.stop,
frame_provider.Quality.ORIGINAL,
frame_provider.Type.BUFFER)
for frame_id, (frame_data, _) in enumerate(frames):
if (project_data is not None and (task_data.db_task.id, frame_id) in project_data.deleted_frames) \
or frame_id in task_data.deleted_frames:
for frame_id, (frame_data, _) in zip(instance_data.rel_range, frames):
if (project_data is not None and (instance_data.db_instance.id, frame_id) in project_data.deleted_frames) \
or frame_id in instance_data.deleted_frames:
continue
frame_name = task_data.frame_info[frame_id]['path'] if project_data is None \
else project_data.frame_info[(task_data.db_task.id, frame_id)]['path']
frame_name = instance_data.frame_info[frame_id]['path'] if project_data is None \
else project_data.frame_info[(instance_data.db_instance.id, frame_id)]['path']
img_path = osp.join(img_dir, frame_name + ext)
os.makedirs(osp.dirname(img_path), exist_ok=True)
with open(img_path, 'wb') as f:
f.write(frame_data.getvalue())
def _export_task(dst_file, task_data, anno_callback, save_images=False):
def _export_task_or_job(dst_file, instance_data, anno_callback, save_images=False):
with TemporaryDirectory() as temp_dir:
with open(osp.join(temp_dir, 'annotations.xml'), 'wb') as f:
dump_task_anno(f, task_data, anno_callback)
dump_task_or_job_anno(f, instance_data, anno_callback)
if save_images:
dump_media_files(task_data, osp.join(temp_dir, 'images'))
dump_media_files(instance_data, osp.join(temp_dir, 'images'))
make_zip_archive(temp_dir, dst_file)
......@@ -1307,7 +1308,7 @@ def _export_project(dst_file: str, project_data: ProjectData, anno_callback: Cal
if save_images:
for task_data in project_data.task_data:
subset = get_defaulted_subset(task_data.db_task.subset, project_data.subsets)
subset = get_defaulted_subset(task_data.db_instance.subset, project_data.subsets)
subset_dir = osp.join(temp_dir, 'images', subset)
os.makedirs(subset_dir, exist_ok=True)
dump_media_files(task_data, subset_dir, project_data)
......@@ -1320,7 +1321,7 @@ def _export_video(dst_file, instance_data, save_images=False):
_export_project(dst_file, instance_data,
anno_callback=dump_as_cvat_interpolation, save_images=save_images)
else:
_export_task(dst_file, instance_data,
_export_task_or_job(dst_file, instance_data,
anno_callback=dump_as_cvat_interpolation, save_images=save_images)
@exporter(name='CVAT for images', ext='ZIP', version='1.1')
......@@ -1329,7 +1330,7 @@ def _export_images(dst_file, instance_data, save_images=False):
_export_project(dst_file, instance_data,
anno_callback=dump_as_cvat_annotation, save_images=save_images)
else:
_export_task(dst_file, instance_data,
_export_task_or_job(dst_file, instance_data,
anno_callback=dump_as_cvat_annotation, save_images=save_images)
@importer(name='CVAT', ext='XML, ZIP', version='1.1')
......
# Copyright (C) 2021-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
......@@ -9,8 +10,7 @@ from datumaro.components.dataset import Dataset
from datumaro.plugins.kitti_format.format import KittiPath, write_label_map
from pyunpack import Archive
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor,
ProjectData, import_dm_annotations)
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, import_dm_annotations)
from cvat.apps.dataset_manager.util import make_zip_archive
from .transformations import RotatedBoxesToPolygons
......@@ -45,8 +45,7 @@ def _import(src_file, instance_data):
write_label_map(color_map_path, color_map)
dataset = Dataset.import_from(tmp_dir, format='kitti', env=dm_env)
labels_meta = instance_data.meta['project']['labels'] \
if isinstance(instance_data, ProjectData) else instance_data.meta['task']['labels']
labels_meta = instance_data.meta[instance_data.META_FIELD]['labels']
if 'background' not in [label['name'] for _, label in labels_meta]:
dataset.filter('/item/annotation[label != "background"]',
filter_annotations=True)
......
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
......@@ -13,13 +14,15 @@ from cvat.apps.dataset_manager.util import make_zip_archive
from .registry import dm_env, exporter, importer
def _import_task(dataset, task_data):
def _import_to_task(dataset, instance_data):
tracks = {}
label_cat = dataset.categories()[datumaro.AnnotationType.label]
for item in dataset:
frame_number = int(item.id) - 1 # NOTE: MOT frames start from 1
frame_number = task_data.abs_frame_id(frame_number)
# NOTE: MOT frames start from 1
# job has an offset, for task offset is 0
frame_number = int(item.id) - 1 + instance_data.start
frame_number = instance_data.abs_frame_id(frame_number)
for ann in item.annotations:
if ann.type != datumaro.AnnotationType.bbox:
......@@ -28,7 +31,7 @@ def _import_task(dataset, task_data):
track_id = ann.attributes.get('track_id')
if track_id is None:
# Extension. Import regular boxes:
task_data.add_shape(task_data.LabeledShape(
instance_data.add_shape(instance_data.LabeledShape(
type='rectangle',
label=label_cat.items[ann.label].name,
points=ann.points,
......@@ -41,7 +44,7 @@ def _import_task(dataset, task_data):
))
continue
shape = task_data.TrackedShape(
shape = instance_data.TrackedShape(
type='rectangle',
points=ann.points,
occluded=ann.attributes.get('occluded') is True,
......@@ -55,7 +58,7 @@ def _import_task(dataset, task_data):
# build trajectories as lists of shapes in track dict
if track_id not in tracks:
tracks[track_id] = task_data.Track(
tracks[track_id] = instance_data.Track(
label_cat.items[ann.label].name, 0, 'manual', [])
tracks[track_id].shapes.append(shape)
......@@ -67,10 +70,10 @@ def _import_task(dataset, task_data):
prev_shape_idx = 0
prev_shape = track.shapes[0]
for shape in track.shapes[1:]:
has_skip = task_data.frame_step < shape.frame - prev_shape.frame
has_skip = instance_data.frame_step < shape.frame - prev_shape.frame
if has_skip and not prev_shape.outside:
prev_shape = prev_shape._replace(outside=True,
frame=prev_shape.frame + task_data.frame_step)
frame=prev_shape.frame + instance_data.frame_step)
prev_shape_idx += 1
track.shapes.insert(prev_shape_idx, prev_shape)
prev_shape = shape
......@@ -78,12 +81,12 @@ def _import_task(dataset, task_data):
# Append a shape with outside=True to finish the track
last_shape = track.shapes[-1]
if last_shape.frame + task_data.frame_step <= \
int(task_data.meta['task']['stop_frame']):
if last_shape.frame + instance_data.frame_step <= \
int(instance_data.meta[instance_data.META_FIELD]['stop_frame']):
track.shapes.append(last_shape._replace(outside=True,
frame=last_shape.frame + task_data.frame_step)
frame=last_shape.frame + instance_data.frame_step)
)
task_data.add_track(track)
instance_data.add_track(track)
@exporter(name='MOT', ext='ZIP', version='1.1')
......@@ -107,7 +110,7 @@ def _import(src_file, instance_data, load_data_callback=None):
# Dirty way to determine instance type to avoid circular dependency
if hasattr(instance_data, '_db_project'):
for sub_dataset, task_data in instance_data.split_dataset(dataset):
_import_task(sub_dataset, task_data)
_import_to_task(sub_dataset, task_data)
else:
_import_task(dataset, instance_data)
_import_to_task(dataset, instance_data)
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
......@@ -22,16 +23,16 @@ class KeepTracks(ItemTransform):
return item.wrap(annotations=[a for a in item.annotations
if 'track_id' in a.attributes])
def _import_task(dataset, task_data):
def _import_to_task(dataset, instance_data):
tracks = {}
label_cat = dataset.categories()[AnnotationType.label]
root_hint = find_dataset_root(dataset, task_data)
root_hint = find_dataset_root(dataset, instance_data)
shift = 0
for item in dataset:
frame_number = task_data.abs_frame_id(
match_dm_item(item, task_data, root_hint=root_hint))
frame_number = instance_data.abs_frame_id(
match_dm_item(item, instance_data, root_hint=root_hint))
track_ids = set()
......@@ -49,7 +50,7 @@ def _import_task(dataset, task_data):
else:
track_ids.add(track_id)
shape = task_data.TrackedShape(
shape = instance_data.TrackedShape(
type='polygon',
points=ann.points,
occluded=ann.attributes.get('occluded') is True,
......@@ -64,7 +65,7 @@ def _import_task(dataset, task_data):
# build trajectories as lists of shapes in track dict
if track_id not in tracks:
tracks[track_id] = task_data.Track(
tracks[track_id] = instance_data.Track(
label_cat.items[ann.label].name, 0, 'manual', [])
tracks[track_id].shapes.append(shape)
......@@ -75,10 +76,10 @@ def _import_task(dataset, task_data):
prev_shape_idx = 0
prev_shape = track.shapes[0]
for shape in track.shapes[1:]:
has_skip = task_data.frame_step < shape.frame - prev_shape.frame
has_skip = instance_data.frame_step < shape.frame - prev_shape.frame
if has_skip and not prev_shape.outside:
prev_shape = prev_shape._replace(outside=True,
frame=prev_shape.frame + task_data.frame_step)
frame=prev_shape.frame + instance_data.frame_step)
prev_shape_idx += 1
track.shapes.insert(prev_shape_idx, prev_shape)
prev_shape = shape
......@@ -86,12 +87,12 @@ def _import_task(dataset, task_data):
# Append a shape with outside=True to finish the track
last_shape = track.shapes[-1]
if last_shape.frame + task_data.frame_step <= \
int(task_data.meta['task']['stop_frame']):
if last_shape.frame + instance_data.frame_step <= \
int(instance_data.meta[instance_data.META_FIELD]['stop_frame']):
track.shapes.append(last_shape._replace(outside=True,
frame=last_shape.frame + task_data.frame_step)
frame=last_shape.frame + instance_data.frame_step)
)
task_data.add_track(track)
instance_data.add_track(track)
@exporter(name='MOTS PNG', ext='ZIP', version='1.0')
def _export(dst_file, instance_data, save_images=False):
......@@ -120,7 +121,7 @@ def _import(src_file, instance_data, load_data_callback=None):
# Dirty way to determine instance type to avoid circular dependency
if hasattr(instance_data, '_db_project'):
for sub_dataset, task_data in instance_data.split_dataset(dataset):
_import_task(sub_dataset, task_data)
_import_to_task(sub_dataset, task_data)
else:
_import_task(dataset, instance_data)
_import_to_task(dataset, instance_data)
# Copyright (C) 2020-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
......@@ -11,8 +12,7 @@ from tempfile import TemporaryDirectory
from datumaro.components.dataset import Dataset
from pyunpack import Archive
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor,
ProjectData, import_dm_annotations)
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, import_dm_annotations)
from cvat.apps.dataset_manager.util import make_zip_archive
from .registry import dm_env, exporter, importer
......@@ -36,8 +36,7 @@ def _import(src_file, instance_data, load_data_callback=None):
# put label map from the task if not present
labelmap_file = osp.join(tmp_dir, 'labelmap.txt')
if not osp.isfile(labelmap_file):
labels_meta = instance_data.meta['project']['labels'] \
if isinstance(instance_data, ProjectData) else instance_data.meta['task']['labels']
labels_meta = instance_data.meta[instance_data.META_FIELD]['labels']
labels = (label['name'] + ':::' for _, label in labels_meta)
with open(labelmap_file, 'w') as f:
f.write('\n'.join(labels))
......
......@@ -49,8 +49,7 @@ def hex2rgb(color):
return tuple(int(color.lstrip('#')[i:i+2], 16) for i in (0, 2, 4))
def make_colormap(instance_data):
instance_name = 'project' if 'project' in instance_data.meta.keys() else 'task'
labels = [label for _, label in instance_data.meta[instance_name]['labels']]
labels = [label for _, label in instance_data.meta[instance_data.META_FIELD]['labels']]
label_names = [label['name'] for label in labels]
if 'background' not in label_names:
......
......@@ -16,7 +16,7 @@ from cvat.apps.engine.plugins import plugin_decorator
from cvat.apps.profiler import silk_profile
from .annotation import AnnotationIR, AnnotationManager
from .bindings import TaskData
from .bindings import TaskData, JobData
from .formats.registry import make_exporter, make_importer
from .util import bulk_create
......@@ -553,24 +553,24 @@ class JobAnnotation:
return self.ir_data.data
def export(self, dst_file, exporter, host='', **options):
task_data = TaskData(
job_data = JobData(
annotation_ir=self.ir_data,
db_task=self.db_job.segment.task,
db_job=self.db_job,
host=host,
)
exporter(dst_file, task_data, **options)
exporter(dst_file, job_data, **options)
def import_annotations(self, src_file, importer):
task_data = TaskData(
job_data = JobData(
annotation_ir=AnnotationIR(),
db_task=self.db_job.segment.task,
db_job=self.db_job,
create_callback=self.create,
)
self.delete()
importer(src_file, task_data)
importer(src_file, job_data)
self.create(task_data.data.slice(self.start_frame, self.stop_frame).serialize())
self.create(job_data.data.slice(self.start_frame, self.stop_frame).serialize())
class TaskAnnotation:
def __init__(self, pk):
......
......@@ -21,7 +21,7 @@ from rest_framework.test import APIClient, APITestCase
import cvat.apps.dataset_manager as dm
from cvat.apps.dataset_manager.annotation import AnnotationIR
from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor,
from cvat.apps.dataset_manager.bindings import (CvatTaskOrJobDataExtractor,
TaskData, find_dataset_root)
from cvat.apps.dataset_manager.task import TaskAnnotation
from cvat.apps.dataset_manager.util import make_zip_archive
......@@ -417,7 +417,7 @@ class TaskExportTest(_DbTestBase):
task_ann.init_from_db()
task_data = TaskData(task_ann.ir_data, Task.objects.get(pk=task["id"]))
extractor = CvatTaskDataExtractor(task_data)
extractor = CvatTaskOrJobDataExtractor(task_data)
dm_dataset = datumaro.components.project.Dataset.from_extractors(extractor)
self.assertEqual(4, len(dm_dataset.get("image_1").annotations))
......
# Copyright (C) 2021-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
......@@ -22,7 +23,7 @@ from rest_framework import status
from rest_framework.test import APIClient, APITestCase
import cvat.apps.dataset_manager as dm
from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor, TaskData
from cvat.apps.dataset_manager.bindings import CvatTaskOrJobDataExtractor, TaskData
from cvat.apps.dataset_manager.task import TaskAnnotation
from cvat.apps.engine.models import Task
......@@ -185,7 +186,7 @@ class _DbTestBase(APITestCase):
task_ann = TaskAnnotation(task_id)
task_ann.init_from_db()
task_data = TaskData(task_ann.ir_data, Task.objects.get(pk=task_id))
extractor = CvatTaskDataExtractor(task_data, include_images=include_images)
extractor = CvatTaskOrJobDataExtractor(task_data, include_images=include_images)
return Dataset.from_extractors(extractor)
def _get_request_with_data(self, path, data, user):
......
# Copyright (C) 2020-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
......@@ -182,6 +183,6 @@ class FrameProvider:
return (frame, self.VIDEO_FRAME_MIME)
return (frame, mimetypes.guess_type(frame_name)[0])
def get_frames(self, quality=Quality.ORIGINAL, out_type=Type.BUFFER):
for idx in range(self._db_data.size):
def get_frames(self, start_frame, stop_frame, quality=Quality.ORIGINAL, out_type=Type.BUFFER):
for idx in range(start_frame, stop_frame):
yield self.get_frame(idx, quality=quality, out_type=out_type)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册