未验证 提交 5281e793 编写于 作者: M Maria Khrustaleva 提交者: GitHub

Support any name for a manifest (#4041)

* Initial version

* Fix support 2 versions && fix case for cloud storages

* Fix eslint errors

* tmp

* Fix manifest validation when data hasn't been copied yet

* fix

* Update changelog
上级 f74b6f0d
......@@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add "tag" return type for automatic annotation in Nuclio (<https://github.com/openvinotoolkit/cvat/pull/3896>)
- Dataset importing to a project (<https://github.com/openvinotoolkit/cvat/pull/3790>)
- User is able to customize information that text labels show (<https://github.com/openvinotoolkit/cvat/pull/4029>)
- Support for uploading manifest with any name (<https://github.com/openvinotoolkit/cvat/pull/4041>)
### Changed
- TDB
......
......@@ -90,14 +90,13 @@ export class FileManager extends React.PureComponent<Props, State> {
};
}
private loadData = (key: string): Promise<void> =>
new Promise<void>((resolve, reject): void => {
const { onLoadData } = this.props;
private loadData = (key: string): Promise<void> => new Promise<void>((resolve, reject): void => {
const { onLoadData } = this.props;
const success = (): void => resolve();
const failure = (): void => reject();
onLoadData(key, success, failure);
});
const success = (): void => resolve();
const failure = (): void => reject();
onLoadData(key, success, failure);
});
public reset(): void {
const { active } = this.state;
......@@ -161,8 +160,8 @@ export class FileManager extends React.PureComponent<Props, State> {
private renderShareSelector(): JSX.Element {
function renderTreeNodes(data: TreeNodeNormal[]): JSX.Element[] {
// sort alphabetically
data.sort((a: TreeNodeNormal, b: TreeNodeNormal): number =>
a.key.toLocaleString().localeCompare(b.key.toLocaleString()));
data.sort((a: TreeNodeNormal, b: TreeNodeNormal): number => (
a.key.toLocaleString().localeCompare(b.key.toLocaleString())));
return data.map((item: TreeNodeNormal) => {
if (item.children) {
return (
......@@ -205,8 +204,8 @@ export class FileManager extends React.PureComponent<Props, State> {
halfChecked: ReactText[];
},
): void => {
const keys = (checkedKeys as ReactText[]).map((text: ReactText): string =>
text.toLocaleString());
const keys = (checkedKeys as ReactText[]).map((text: ReactText): string => (
text.toLocaleString()));
this.setState({
files: {
...files,
......@@ -267,7 +266,7 @@ export class FileManager extends React.PureComponent<Props, State> {
<CloudStorageTab
formRef={this.cloudStorageTabFormRef}
cloudStorage={cloudStorage}
selectedFiles={files.cloudStorage.filter((item) => !item.endsWith('manifest.jsonl'))}
selectedFiles={files.cloudStorage.filter((item) => !item.endsWith('.jsonl'))}
onSelectCloudStorage={(_cloudStorage: CloudStorage | null) => {
this.setState({ cloudStorage: _cloudStorage });
}}
......
......@@ -24,7 +24,7 @@ from cvat.apps.engine.log import slogger
from cvat.apps.engine.media_extractors import (MEDIA_TYPES, Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter,
ValidateDimension, ZipChunkWriter, ZipCompressedChunkWriter, get_mime, sort)
from cvat.apps.engine.utils import av_scan_paths
from utils.dataset_manifest import ImageManifestManager, VideoManifestManager
from utils.dataset_manifest import ImageManifestManager, VideoManifestManager, is_manifest
from utils.dataset_manifest.core import VideoManifestValidator
from utils.dataset_manifest.utils import detect_related_images
from .cloud_provider import get_cloud_storage_instance, Credentials
......@@ -113,7 +113,7 @@ def _save_task_to_db(db_task):
db_task.data.save()
db_task.save()
def _count_files(data, manifest_file=None):
def _count_files(data, manifest_files=None):
share_root = settings.SHARE_ROOT
server_files = []
......@@ -143,8 +143,8 @@ def _count_files(data, manifest_file=None):
mime = get_mime(full_path)
if mime in counter:
counter[mime].append(rel_path)
elif 'manifest.jsonl' == os.path.basename(rel_path):
manifest_file.append(rel_path)
elif rel_path.endswith('.jsonl'):
manifest_files.append(rel_path)
else:
slogger.glob.warn("Skip '{}' file (its mime type doesn't "
"correspond to supported MIME file type)".format(full_path))
......@@ -163,7 +163,7 @@ def _count_files(data, manifest_file=None):
return counter
def _validate_data(counter, manifest_file=None):
def _validate_data(counter, manifest_files=None):
unique_entries = 0
multiple_entries = 0
for media_type, media_config in MEDIA_TYPES.items():
......@@ -173,7 +173,7 @@ def _validate_data(counter, manifest_file=None):
else:
multiple_entries += len(counter[media_type])
if manifest_file and media_type not in ('video', 'image'):
if manifest_files and media_type not in ('video', 'image'):
raise Exception('File with meta information can only be uploaded with video/images ')
if unique_entries == 1 and multiple_entries > 0 or unique_entries > 1:
......@@ -193,6 +193,16 @@ def _validate_data(counter, manifest_file=None):
return counter, task_modes[0]
def _validate_manifest(manifests, root_dir):
if manifests:
if len(manifests) != 1:
raise Exception('Only one manifest file can be attached with data')
full_manifest_path = os.path.join(root_dir, manifests[0])
if is_manifest(full_manifest_path):
return manifests[0]
raise Exception('Invalid manifest was uploaded')
return None
def _download_data(urls, upload_dir):
job = rq.get_current_job()
local_files = {}
......@@ -233,48 +243,57 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False):
if data['remote_files'] and not isDatasetImport:
data['remote_files'] = _download_data(data['remote_files'], upload_dir)
manifest_file = []
media = _count_files(data, manifest_file)
media, task_mode = _validate_data(media, manifest_file)
if manifest_file and (not settings.USE_CACHE or db_data.storage_method != models.StorageMethodChoice.CACHE):
raise Exception("File with meta information can be uploaded if 'Use cache' option is also selected")
manifest_files = []
media = _count_files(data, manifest_files)
media, task_mode = _validate_data(media, manifest_files)
if data['server_files']:
if db_data.storage == models.StorageChoice.LOCAL:
_copy_data_from_source(data['server_files'], upload_dir, data.get('server_files_path'))
elif db_data.storage == models.StorageChoice.SHARE:
upload_dir = settings.SHARE_ROOT
else: # cloud storage
if not manifest_file: raise Exception('A manifest file not found')
db_cloud_storage = db_data.cloud_storage
credentials = Credentials()
credentials.convert_from_db({
'type': db_cloud_storage.credentials_type,
'value': db_cloud_storage.credentials,
})
details = {
'resource': db_cloud_storage.resource,
'credentials': credentials,
'specific_attributes': db_cloud_storage.get_specific_attributes()
}
cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details)
sorted_media = sort(media['image'], data['sorting_method'])
first_sorted_media_image = sorted_media[0]
cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image))
# prepare task manifest file from cloud storage manifest file
# NOTE we should create manifest before defining chunk_size
# FIXME in the future when will be implemented archive support
manifest = ImageManifestManager(db_data.get_manifest_path())
cloud_storage_manifest = ImageManifestManager(
os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file[0]),
db_data.cloud_storage.get_storage_dirname()
)
cloud_storage_manifest.set_index()
sequence, content = cloud_storage_manifest.get_subset(sorted_media)
sorted_content = (i[1] for i in sorted(zip(sequence, content)))
manifest.create(sorted_content)
manifest_root = None
if db_data.storage in {models.StorageChoice.LOCAL, models.StorageChoice.SHARE}:
manifest_root = upload_dir
elif db_data.storage == models.StorageChoice.CLOUD_STORAGE:
manifest_root = db_data.cloud_storage.get_storage_dirname()
manifest_file = _validate_manifest(manifest_files, manifest_root)
if manifest_file and (not settings.USE_CACHE or db_data.storage_method != models.StorageMethodChoice.CACHE):
raise Exception("File with meta information can be uploaded if 'Use cache' option is also selected")
if data['server_files'] and db_data.storage == models.StorageChoice.CLOUD_STORAGE:
if not manifest_file: raise Exception('A manifest file not found')
db_cloud_storage = db_data.cloud_storage
credentials = Credentials()
credentials.convert_from_db({
'type': db_cloud_storage.credentials_type,
'value': db_cloud_storage.credentials,
})
details = {
'resource': db_cloud_storage.resource,
'credentials': credentials,
'specific_attributes': db_cloud_storage.get_specific_attributes()
}
cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details)
sorted_media = sort(media['image'], data['sorting_method'])
first_sorted_media_image = sorted_media[0]
cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image))
# prepare task manifest file from cloud storage manifest file
# NOTE we should create manifest before defining chunk_size
# FIXME in the future when will be implemented archive support
manifest = ImageManifestManager(db_data.get_manifest_path())
cloud_storage_manifest = ImageManifestManager(
os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file),
db_data.cloud_storage.get_storage_dirname()
)
cloud_storage_manifest.set_index()
sequence, content = cloud_storage_manifest.get_subset(sorted_media)
sorted_content = (i[1] for i in sorted(zip(sequence, content)))
manifest.create(sorted_content)
av_scan_paths(upload_dir)
......@@ -432,12 +451,12 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False):
if not media_files:
continue
# replace manifest file (e.g was uploaded 'subdir/manifest.jsonl')
# replace manifest file (e.g was uploaded 'subdir/manifest.jsonl' or 'some_manifest.jsonl')
if manifest_file and not os.path.exists(db_data.get_manifest_path()):
shutil.copyfile(os.path.join(upload_dir, manifest_file[0]),
shutil.copyfile(os.path.join(upload_dir, manifest_file),
db_data.get_manifest_path())
if upload_dir != settings.SHARE_ROOT:
os.remove(os.path.join(upload_dir, manifest_file[0]))
os.remove(os.path.join(upload_dir, manifest_file))
if task_mode == MEDIA_TYPES['video']['mode']:
try:
......
......@@ -1359,7 +1359,7 @@ class CloudStorageViewSet(auth.CloudStorageGetQuerySetMixin, viewsets.ModelViewS
storage = get_cloud_storage_instance(cloud_provider=db_storage.provider_type, **details)
if not db_storage.manifests.count():
raise Exception('There is no manifest file')
manifest_path = request.query_params.get('manifest_path', 'manifest.jsonl')
manifest_path = request.query_params.get('manifest_path', db_storage.manifests.first().filename)
file_status = storage.get_file_status(manifest_path)
if file_status == Status.NOT_FOUND:
raise FileNotFoundError(errno.ENOENT,
......
# Copyright (C) 2021 Intel Corporation
#
# SPDX-License-Identifier: MIT
from .core import VideoManifestManager, ImageManifestManager
\ No newline at end of file
from .core import VideoManifestManager, ImageManifestManager, is_manifest
\ No newline at end of file
......@@ -2,15 +2,18 @@
#
# SPDX-License-Identifier: MIT
from enum import Enum
import av
import json
import os
from abc import ABC, abstractmethod, abstractproperty
from abc import ABC, abstractmethod, abstractproperty, abstractstaticmethod
from contextlib import closing
from tempfile import NamedTemporaryFile
from PIL import Image
from .utils import md5_hash, rotate_image, sort, SortingMethod
from json.decoder import JSONDecodeError
from .utils import SortingMethod, md5_hash, rotate_image, sort
class VideoStreamReader:
def __init__(self, source_path, chunk_size, force):
......@@ -238,8 +241,19 @@ class Dataset3DImagesReader(DatasetImagesReader):
yield dict()
class _Manifest:
class SupportedVersion(str, Enum):
V1 = '1.0'
V1_1 = '1.1'
@classmethod
def choices(cls):
return (x.value for x in cls)
def __str__(self):
return self.value
FILE_NAME = 'manifest.jsonl'
VERSION = '1.1'
VERSION = SupportedVersion.V1_1
def __init__(self, path, upload_dir=None):
assert path, 'A path to manifest file not found'
......@@ -509,13 +523,6 @@ class VideoManifestManager(_ManifestManager):
def get_subset(self, subset_names):
raise NotImplementedError()
#TODO: add generic manifest structure file validation
class ManifestValidator:
def validate_base_info(self):
with open(self._manifest.path, 'r') as manifest_file:
assert self._manifest.VERSION != json.loads(manifest_file.readline())['version']
assert self._manifest.TYPE != json.loads(manifest_file.readline())['type']
class VideoManifestValidator(VideoManifestManager):
def __init__(self, source_path, manifest_path):
self._source_path = source_path
......@@ -607,12 +614,113 @@ class ImageManifestManager(_ManifestManager):
image_name = f"{image['name']}{image['extension']}"
if image_name in subset_names:
index_list.append(subset_names.index(image_name))
subset.append({
properties = {
'name': f"{image['name']}",
'extension': f"{image['extension']}",
'width': image['width'],
'height': image['height'],
'meta': image['meta'],
'checksum': f"{image['checksum']}"
})
}
for optional_field in {'meta', 'checksum'}:
value = image.get(optional_field)
if value:
properties[optional_field] = value
subset.append(properties)
return index_list, subset
class _BaseManifestValidator(ABC):
def __init__(self, full_manifest_path):
self._manifest = _Manifest(full_manifest_path)
def validate(self):
try:
# we cannot use index in general because manifest may be e.g. in share point with ro mode
with open(self._manifest.path, 'r') as manifest:
for validator in self.validators:
line = json.loads(manifest.readline().strip())
validator(line)
return True
except (ValueError, KeyError, JSONDecodeError):
return False
@staticmethod
def _validate_version(_dict):
if not _dict['version'] in _Manifest.SupportedVersion.choices():
raise ValueError('Incorrect version field')
def _validate_type(self, _dict):
if not _dict['type'] == self.TYPE:
raise ValueError('Incorrect type field')
@abstractproperty
def validators(self):
pass
@abstractstaticmethod
def _validate_first_item(_dict):
pass
class _VideoManifestStructureValidator(_BaseManifestValidator):
TYPE = 'video'
@property
def validators(self):
return (
self._validate_version,
self._validate_type,
self._validate_properties,
self._validate_first_item,
)
@staticmethod
def _validate_properties(_dict):
properties = _dict['properties']
if not isinstance(properties['name'], str):
raise ValueError('Incorrect name field')
if not isinstance(properties['resolution'], list):
raise ValueError('Incorrect resolution field')
if not isinstance(properties['length'], int) or properties['length'] == 0:
raise ValueError('Incorrect length field')
@staticmethod
def _validate_first_item(_dict):
if not isinstance(_dict['number'], int):
raise ValueError('Incorrect number field')
if not isinstance(_dict['pts'], int):
raise ValueError('Incorrect pts field')
class _DatasetManifestStructureValidator(_BaseManifestValidator):
TYPE = 'images'
@property
def validators(self):
return (
self._validate_version,
self._validate_type,
self._validate_first_item,
)
@staticmethod
def _validate_first_item(_dict):
if not isinstance(_dict['name'], str):
raise ValueError('Incorrect name field')
if not isinstance(_dict['extension'], str):
raise ValueError('Incorrect extension field')
# width and height are required for 2d data
# FIXME for 3d when manual preparation of the manifest will be implemented
if not isinstance(_dict['width'], int):
raise ValueError('Incorrect width field')
if not isinstance(_dict['height'], int):
raise ValueError('Incorrect height field')
def is_manifest(full_manifest_path):
return _is_video_manifest(full_manifest_path) or \
_is_dataset_manifest(full_manifest_path)
def _is_video_manifest(full_manifest_path):
validator = _VideoManifestStructureValidator(full_manifest_path)
return validator.validate()
def _is_dataset_manifest(full_manifest_path):
validator = _DatasetManifestStructureValidator(full_manifest_path)
return validator.validate()
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册