未验证 提交 68fbcdec 编写于 作者: M Maria Khrustaleva 提交者: GitHub

Added sorting methods (#3937)

上级 2ed9f9cd
......@@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Google Cloud Storage support in UI (<https://github.com/openvinotoolkit/cvat/pull/3919>)
- Add project tasks paginations (<https://github.com/openvinotoolkit/cvat/pull/3910>)
- Add remove issue button (<https://github.com/openvinotoolkit/cvat/pull/3952>)
- Data sorting option (<https://github.com/openvinotoolkit/cvat/pull/3937>)
- Options to change font size & position of text labels on the canvas (<https://github.com/openvinotoolkit/cvat/pull/3972>)
- Add "tag" return type for automatic annotation in Nuclio (<https://github.com/openvinotoolkit/cvat/pull/3896>)
......
{
"name": "cvat-core",
"version": "3.20.1",
"version": "3.21.0",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "cvat-core",
"version": "3.20.1",
"version": "3.21.0",
"license": "MIT",
"dependencies": {
"axios": "^0.21.4",
......
{
"name": "cvat-core",
"version": "3.20.1",
"version": "3.21.0",
"description": "Part of Computer Vision Tool which presents an interface for client-side integration",
"main": "babel.config.js",
"scripts": {
......
......@@ -367,6 +367,24 @@
KEY_FILE_PATH: 'KEY_FILE_PATH',
});
/**
* Sorting methods
* @enum {string}
* @name SortingMethod
* @memberof module:API.cvat.enums
* @property {string} LEXICOGRAPHICAL 'lexicographical'
* @property {string} NATURAL 'natural'
* @property {string} PREDEFINED 'predefined'
* @property {string} RANDOM 'random'
* @readonly
*/
const SortingMethod = Object.freeze({
LEXICOGRAPHICAL: 'lexicographical',
NATURAL: 'natural',
PREDEFINED: 'predefined',
RANDOM: 'random',
});
module.exports = {
ShareFileType,
TaskStatus,
......@@ -384,5 +402,6 @@
DimensionType,
CloudStorageProviderType,
CloudStorageCredentialsType,
SortingMethod,
};
})();
......@@ -1017,6 +1017,7 @@
copy_data: undefined,
dimension: undefined,
cloud_storage_id: undefined,
sorting_method: undefined,
};
const updatedFields = new FieldUpdateTrigger({
......@@ -1549,6 +1550,16 @@
cloudStorageId: {
get: () => data.cloud_storage_id,
},
sortingMethod: {
/**
* @name sortingMethod
* @type {module:API.cvat.enums.SortingMethod}
* @memberof module:API.cvat.classes.Task
* @instance
* @readonly
*/
get: () => data.sorting_method,
},
_internalData: {
get: () => data,
},
......@@ -2061,6 +2072,7 @@
image_quality: this.imageQuality,
use_zip_chunks: this.useZipChunks,
use_cache: this.useCache,
sorting_method: this.sortingMethod,
};
if (typeof this.startFrame !== 'undefined') {
......
{
"name": "cvat-ui",
"version": "1.28.1",
"version": "1.28.2",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "cvat-ui",
"version": "1.28.1",
"version": "1.28.2",
"license": "MIT",
"dependencies": {
"@ant-design/icons": "^4.6.3",
......
{
"name": "cvat-ui",
"version": "1.28.1",
"version": "1.28.2",
"description": "CVAT single-page application",
"main": "src/index.tsx",
"scripts": {
......
......@@ -353,6 +353,7 @@ export function createTaskAsync(data: any): ThunkAction<Promise<void>, {}, {}, A
image_quality: 70,
use_zip_chunks: data.advanced.useZipChunks,
use_cache: data.advanced.useCache,
sorting_method: data.advanced.sortingMethod,
};
if (data.projectId) {
......
......@@ -7,6 +7,7 @@ import { Row, Col } from 'antd/lib/grid';
import { PercentageOutlined } from '@ant-design/icons';
import Input from 'antd/lib/input';
import Select from 'antd/lib/select';
import Radio from 'antd/lib/radio';
import Checkbox from 'antd/lib/checkbox';
import Form, { FormInstance, RuleObject, RuleRender } from 'antd/lib/form';
import Text from 'antd/lib/typography/Text';
......@@ -16,6 +17,13 @@ import patterns from 'utils/validation-patterns';
const { Option } = Select;
export enum SortingMethod {
LEXICOGRAPHICAL = 'lexicographical',
NATURAL = 'natural',
PREDEFINED = 'predefined',
RANDOM = 'random',
}
export interface AdvancedConfiguration {
bugTracker?: string;
imageQuality?: number;
......@@ -31,6 +39,7 @@ export interface AdvancedConfiguration {
dataChunkSize?: number;
useCache: boolean;
copyData?: boolean;
sortingMethod: SortingMethod;
}
const initialValues: AdvancedConfiguration = {
......@@ -39,6 +48,7 @@ const initialValues: AdvancedConfiguration = {
useZipChunks: true,
useCache: true,
copyData: false,
sortingMethod: SortingMethod.LEXICOGRAPHICAL,
};
interface Props {
......@@ -178,6 +188,33 @@ class AdvancedConfigurationForm extends React.PureComponent<Props> {
);
}
private renderSortingMethodRadio(): JSX.Element {
return (
<Form.Item
label='Sorting method'
name='sortingMethod'
rules={[
{
required: true,
message: 'The field is required.',
},
]}
help='Specify how to sort images. It is not relevant for videos.'
>
<Radio.Group>
<Radio value={SortingMethod.LEXICOGRAPHICAL} key={SortingMethod.LEXICOGRAPHICAL}>
Lexicographical
</Radio>
<Radio value={SortingMethod.NATURAL} key={SortingMethod.NATURAL}>Natural</Radio>
<Radio value={SortingMethod.PREDEFINED} key={SortingMethod.PREDEFINED}>
Predefined
</Radio>
<Radio value={SortingMethod.RANDOM} key={SortingMethod.RANDOM}>Random</Radio>
</Radio.Group>
</Form.Item>
);
}
private renderImageQuality(): JSX.Element {
return (
<CVATTooltip title='Defines images compression level'>
......@@ -290,8 +327,7 @@ class AdvancedConfigurationForm extends React.PureComponent<Props> {
>
<Select style={{ width: '100%' }} initialValue='CVAT for video 1.1'>
{
dumpers.map((dumper: any) =>
<Option value={dumper.name}>{dumper.name}</Option>)
dumpers.map((dumper: any) => <Option value={dumper.name}>{dumper.name}</Option>)
}
</Select>
</Form.Item>
......@@ -384,6 +420,9 @@ class AdvancedConfigurationForm extends React.PureComponent<Props> {
const { installedGit, activeFileManagerTab } = this.props;
return (
<Form initialValues={initialValues} ref={this.formRef} layout='vertical'>
<Row>
<Col>{this.renderSortingMethodRadio()}</Col>
</Row>
{activeFileManagerTab === 'share' ? (
<Row>
<Col>{this.renderCopyDataChechbox()}</Col>
......
......@@ -20,7 +20,7 @@ import { Files } from 'components/file-manager/file-manager';
import BasicConfigurationForm, { BaseConfiguration } from './basic-configuration-form';
import ProjectSearchField from './project-search-field';
import ProjectSubsetField from './project-subset-field';
import AdvancedConfigurationForm, { AdvancedConfiguration } from './advanced-configuration-form';
import AdvancedConfigurationForm, { AdvancedConfiguration, SortingMethod } from './advanced-configuration-form';
export interface CreateTaskData {
projectId: number | null;
......@@ -54,6 +54,7 @@ const defaultState = {
lfs: false,
useZipChunks: true,
useCache: true,
sortingMethod: SortingMethod.LEXICOGRAPHICAL,
},
labels: [],
files: {
......
......@@ -65,6 +65,7 @@ class _TaskBackupBase():
'chunk_type',
'storage_method',
'storage',
'sorting_method',
}
self._prepare_meta(allowed_fields, data)
......
......@@ -14,11 +14,13 @@ from contextlib import closing
import av
import numpy as np
from natsort import os_sorted
from pyunpack import Archive
from PIL import Image, ImageFile
from random import shuffle
import open3d as o3d
from cvat.apps.engine.utils import rotate_image
from cvat.apps.engine.models import DimensionType
from cvat.apps.engine.models import DimensionType, SortingMethod
# fixes: "OSError:broken data stream" when executing line 72 while loading images downloaded from the web
# see: https://stackoverflow.com/questions/42462431/oserror-broken-data-stream-when-reading-image-file
......@@ -47,9 +49,22 @@ def files_to_ignore(directory):
return True
return False
def sort(images, sorting_method=SortingMethod.LEXICOGRAPHICAL, func=None):
if sorting_method == SortingMethod.LEXICOGRAPHICAL:
return sorted(images, key=func)
elif sorting_method == SortingMethod.NATURAL:
return os_sorted(images, key=func)
elif sorting_method == SortingMethod.PREDEFINED:
return images
elif sorting_method == SortingMethod.RANDOM:
shuffle(images)
return images
else:
raise NotImplementedError()
class IMediaReader(ABC):
def __init__(self, source_path, step, start, stop, dimension):
self._source_path = sorted(source_path)
self._source_path = source_path
self._step = step
self._start = start
self._stop = stop
......@@ -90,7 +105,13 @@ class IMediaReader(ABC):
return range(self._start, self._stop, self._step)
class ImageListReader(IMediaReader):
def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D):
def __init__(self,
source_path,
step=1,
start=0,
stop=None,
dimension=DimensionType.DIM_2D,
sorting_method=SortingMethod.LEXICOGRAPHICAL):
if not source_path:
raise Exception('No image found')
......@@ -102,13 +123,15 @@ class ImageListReader(IMediaReader):
assert stop > start
super().__init__(
source_path=source_path,
source_path=sort(source_path, sorting_method),
step=step,
start=start,
stop=stop,
dimension=dimension
)
self._sorting_method = sorting_method
def __iter__(self):
for i in range(self._start, self._stop, self._step):
yield (self.get_image(i), self.get_path(i), i)
......@@ -121,7 +144,8 @@ class ImageListReader(IMediaReader):
step=self._step,
start=self._start,
stop=self._stop,
dimension=self._dimension
dimension=self._dimension,
sorting_method=self._sorting_method
)
def get_path(self, i):
......@@ -154,7 +178,8 @@ class ImageListReader(IMediaReader):
source_path=source_files,
step=step,
start=start,
stop=stop
stop=stop,
sorting_method=self._sorting_method,
)
self._dimension = dimension
......@@ -163,7 +188,13 @@ class ImageListReader(IMediaReader):
return [self.get_path(idx) for idx, _ in enumerate(self._source_path)]
class DirectoryReader(ImageListReader):
def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D):
def __init__(self,
source_path,
step=1,
start=0,
stop=None,
dimension=DimensionType.DIM_2D,
sorting_method=SortingMethod.LEXICOGRAPHICAL):
image_paths = []
for source in source_path:
for root, _, files in os.walk(source):
......@@ -176,10 +207,17 @@ class DirectoryReader(ImageListReader):
start=start,
stop=stop,
dimension=dimension,
sorting_method=sorting_method,
)
class ArchiveReader(DirectoryReader):
def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D):
def __init__(self,
source_path,
step=1,
start=0,
stop=None,
dimension=DimensionType.DIM_2D,
sorting_method=SortingMethod.LEXICOGRAPHICAL):
self._archive_source = source_path[0]
extract_dir = source_path[1] if len(source_path) > 1 else os.path.dirname(source_path[0])
Archive(self._archive_source).extractall(extract_dir)
......@@ -190,11 +228,18 @@ class ArchiveReader(DirectoryReader):
step=step,
start=start,
stop=stop,
dimension=dimension
dimension=dimension,
sorting_method=sorting_method,
)
class PdfReader(ImageListReader):
def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D):
def __init__(self,
source_path,
step=1,
start=0,
stop=None,
dimension=DimensionType.DIM_2D,
sorting_method=SortingMethod.LEXICOGRAPHICAL):
if not source_path:
raise Exception('No PDF found')
......@@ -223,14 +268,26 @@ class PdfReader(ImageListReader):
start=start,
stop=stop,
dimension=dimension,
sorting_method=sorting_method,
)
class ZipReader(ImageListReader):
def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D):
def __init__(self,
source_path,
step=1,
start=0,
stop=None,
dimension=DimensionType.DIM_2D,
sorting_method=SortingMethod.LEXICOGRAPHICAL):
self._zip_source = zipfile.ZipFile(source_path[0], mode='r')
self.extract_dir = source_path[1] if len(source_path) > 1 else None
file_list = [f for f in self._zip_source.namelist() if files_to_ignore(f) and get_mime(f) == 'image']
super().__init__(file_list, step=step, start=start, stop=stop, dimension=dimension)
super().__init__(file_list,
step=step,
start=start,
stop=stop,
dimension=dimension,
sorting_method=sorting_method)
def __del__(self):
self._zip_source.close()
......
# Generated by Django 3.1.13 on 2021-12-03 08:06
import cvat.apps.engine.models
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('engine', '0044_auto_20211123_0824'),
]
operations = [
migrations.AddField(
model_name='data',
name='sorting_method',
field=models.CharField(choices=[('lexicographical', 'LEXICOGRAPHICAL'), ('natural', 'NATURAL'), ('predefined', 'PREDEFINED'), ('random', 'RANDOM')], default=cvat.apps.engine.models.SortingMethod['LEXICOGRAPHICAL'], max_length=15),
),
]
......@@ -81,6 +81,19 @@ class StorageChoice(str, Enum):
def __str__(self):
return self.value
class SortingMethod(str, Enum):
LEXICOGRAPHICAL = 'lexicographical'
NATURAL = 'natural'
PREDEFINED = 'predefined'
RANDOM = 'random'
@classmethod
def choices(cls):
return tuple((x.value, x.name) for x in cls)
def __str__(self):
return self.value
class Data(models.Model):
chunk_size = models.PositiveIntegerField(null=True)
size = models.PositiveIntegerField(default=0)
......@@ -95,6 +108,7 @@ class Data(models.Model):
storage_method = models.CharField(max_length=15, choices=StorageMethodChoice.choices(), default=StorageMethodChoice.FILE_SYSTEM)
storage = models.CharField(max_length=15, choices=StorageChoice.choices(), default=StorageChoice.LOCAL)
cloud_storage = models.ForeignKey('CloudStorage', on_delete=models.SET_NULL, null=True, related_name='data')
sorting_method = models.CharField(max_length=15, choices=SortingMethod.choices(), default=SortingMethod.LEXICOGRAPHICAL)
class Meta:
default_permissions = ()
......
......@@ -281,7 +281,7 @@ class DataSerializer(serializers.ModelSerializer):
model = models.Data
fields = ('chunk_size', 'size', 'image_quality', 'start_frame', 'stop_frame', 'frame_filter',
'compressed_chunk_type', 'original_chunk_type', 'client_files', 'server_files', 'remote_files', 'use_zip_chunks',
'cloud_storage_id', 'use_cache', 'copy_data', 'storage_method', 'storage')
'cloud_storage_id', 'use_cache', 'copy_data', 'storage_method', 'storage', 'sorting_method')
# pylint: disable=no-self-use
def validate_frame_filter(self, value):
......@@ -308,9 +308,9 @@ class DataSerializer(serializers.ModelSerializer):
client_files = validated_data.pop('client_files')
server_files = validated_data.pop('server_files')
remote_files = validated_data.pop('remote_files')
validated_data.pop('use_zip_chunks')
validated_data.pop('use_cache')
validated_data.pop('copy_data')
for extra_key in { 'use_zip_chunks', 'use_cache', 'copy_data' }:
validated_data.pop(extra_key)
db_data = models.Data.objects.create(**validated_data)
data_path = db_data.get_data_dirname()
......
......@@ -22,7 +22,7 @@ from django.db import transaction
from cvat.apps.engine import models
from cvat.apps.engine.log import slogger
from cvat.apps.engine.media_extractors import (MEDIA_TYPES, Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter,
ValidateDimension, ZipChunkWriter, ZipCompressedChunkWriter, get_mime)
ValidateDimension, ZipChunkWriter, ZipCompressedChunkWriter, get_mime, sort)
from cvat.apps.engine.utils import av_scan_paths
from utils.dataset_manifest import ImageManifestManager, VideoManifestManager
from utils.dataset_manifest.core import VideoManifestValidator
......@@ -123,15 +123,18 @@ def _count_files(data, manifest_file=None):
raise ValueError("Bad file path: " + path)
server_files.append(path)
server_files.sort(reverse=True)
sorted_server_files = sorted(server_files, reverse=True)
# The idea of the code is trivial. After sort we will have files in the
# following order: 'a/b/c/d/2.txt', 'a/b/c/d/1.txt', 'a/b/c/d', 'a/b/c'
# Let's keep all items which aren't substrings of the previous item. In
# the example above only 2.txt and 1.txt files will be in the final list.
# Also need to correctly handle 'a/b/c0', 'a/b/c' case.
data['server_files'] = [v[1] for v in zip([""] + server_files, server_files)
without_extra_dirs = [v[1] for v in zip([""] + sorted_server_files, sorted_server_files)
if not os.path.dirname(v[0]).startswith(v[1])]
# we need to keep the original sequence of files
data['server_files'] = [f for f in server_files if f in without_extra_dirs]
def count_files(file_mapping, counter):
for rel_path, full_path in file_mapping.items():
mime = get_mime(full_path)
......@@ -141,7 +144,7 @@ def _count_files(data, manifest_file=None):
manifest_file.append(rel_path)
else:
slogger.glob.warn("Skip '{}' file (its mime type doesn't "
"correspond to a video or an image file)".format(full_path))
"correspond to supported MIME file type)".format(full_path))
counter = { media_type: [] for media_type in MEDIA_TYPES.keys() }
......@@ -213,6 +216,7 @@ def _download_data(urls, upload_dir):
def _get_manifest_frame_indexer(start_frame=0, frame_step=1):
return lambda frame_id: start_frame + frame_id * frame_step
@transaction.atomic
def _create_thread(tid, data, isImport=False):
slogger.glob.info("create task #{}".format(tid))
......@@ -222,15 +226,13 @@ def _create_thread(tid, data, isImport=False):
upload_dir = db_data.get_upload_dirname()
if data['remote_files']:
if db_data.storage != models.StorageChoice.CLOUD_STORAGE:
data['remote_files'] = _download_data(data['remote_files'], upload_dir)
data['remote_files'] = _download_data(data['remote_files'], upload_dir)
manifest_file = []
media = _count_files(data, manifest_file)
media, task_mode = _validate_data(media, manifest_file)
if manifest_file:
assert settings.USE_CACHE and db_data.storage_method == models.StorageMethodChoice.CACHE, \
"File with meta information can be uploaded if 'Use cache' option is also selected"
if manifest_file and (not settings.USE_CACHE or db_data.storage_method != models.StorageMethodChoice.CACHE):
raise Exception("File with meta information can be uploaded if 'Use cache' option is also selected")
if data['server_files']:
if db_data.storage == models.StorageChoice.LOCAL:
......@@ -252,19 +254,22 @@ def _create_thread(tid, data, isImport=False):
'specific_attributes': db_cloud_storage.get_specific_attributes()
}
cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details)
first_sorted_media_image = sorted(media['image'])[0]
sorted_media = sort(media['image'], data['sorting_method'])
first_sorted_media_image = sorted_media[0]
cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image))
# prepare task manifest file from cloud storage manifest file
# NOTE we should create manifest before defining chunk_size
# FIXME in the future when will be implemented archive support
manifest = ImageManifestManager(db_data.get_manifest_path())
cloud_storage_manifest = ImageManifestManager(
os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file[0]),
db_data.cloud_storage.get_storage_dirname()
)
cloud_storage_manifest.set_index()
media_files = sorted(media['image'])
content = cloud_storage_manifest.get_subset(media_files)
manifest.create(content)
sequence, content = cloud_storage_manifest.get_subset(sorted_media)
sorted_content = (i[1] for i in sorted(zip(sequence, content)))
manifest.create(sorted_content)
av_scan_paths(upload_dir)
......@@ -292,24 +297,48 @@ def _create_thread(tid, data, isImport=False):
if media_files:
if extractor is not None:
raise Exception('Combined data types are not supported')
source_paths=[os.path.join(upload_dir, f) for f in media_files]
if media_type in {'archive', 'zip'} and db_data.storage == models.StorageChoice.SHARE:
source_paths.append(db_data.get_upload_dirname())
upload_dir = db_data.get_upload_dirname()
db_data.storage = models.StorageChoice.LOCAL
if isImport and media_type == 'image' and db_data.storage == models.StorageChoice.SHARE:
manifest_index = _get_manifest_frame_indexer(db_data.start_frame, db_data.get_frame_step())
db_data.start_frame = 0
data['stop_frame'] = None
db_data.frame_filter = ''
if isImport and media_type != 'video' and db_data.storage_method == models.StorageMethodChoice.CACHE:
# we should sort media_files according to the manifest content sequence
manifest = ImageManifestManager(db_data.get_manifest_path())
manifest.set_index()
sorted_media_files = []
for idx in range(len(media_files)):
properties = manifest[manifest_index(idx)]
image_name = properties.get('name', None)
image_extension = properties.get('extension', None)
full_image_path = f"{image_name}{image_extension}" if image_name and image_extension else None
if full_image_path and full_image_path in media_files:
sorted_media_files.append(full_image_path)
media_files = sorted_media_files.copy()
del sorted_media_files
data['sorting_method'] = models.SortingMethod.PREDEFINED
source_paths=[os.path.join(upload_dir, f) for f in media_files]
if manifest_file and not isImport and data['sorting_method'] in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED}:
raise Exception("It isn't supported to upload manifest file and use random sorting")
if isImport and db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM and \
data['sorting_method'] in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED}:
raise Exception("It isn't supported to import the task that was created without cache but with random/predefined sorting")
extractor = MEDIA_TYPES[media_type]['extractor'](
source_path=source_paths,
step=db_data.get_frame_step(),
start=db_data.start_frame,
stop=data['stop_frame'],
)
if media_type in {'archive', 'zip'} and db_data.storage == models.StorageChoice.SHARE:
source_paths.append(db_data.get_upload_dirname())
upload_dir = db_data.get_upload_dirname()
db_data.storage = models.StorageChoice.LOCAL
details = {
'source_path': source_paths,
'step': db_data.get_frame_step(),
'start': db_data.start_frame,
'stop': data['stop_frame'],
}
if media_type != 'video':
details['sorting_method'] = data['sorting_method']
extractor = MEDIA_TYPES[media_type]['extractor'](**details)
validate_dimension = ValidateDimension()
if isinstance(extractor, MEDIA_TYPES['zip']['extractor']):
......@@ -474,8 +503,12 @@ def _create_thread(tid, data, isImport=False):
chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames]
img_sizes = []
for _, frame_id in chunk_paths:
for chunk_path, frame_id in chunk_paths:
properties = manifest[manifest_index(frame_id)]
# check mapping
if not chunk_path.endswith(f"{properties['name']}{properties['extension']}"):
raise Exception('Incorrect file mapping to manifest content')
if db_task.dimension == models.DimensionType.DIM_2D:
resolution = (properties['width'], properties['height'])
else:
......
......@@ -30,9 +30,9 @@ from rest_framework.test import APIClient, APITestCase
from datumaro.util.test_utils import TestDir
from cvat.apps.engine.models import (AttributeSpec, AttributeType, Data, Job, Project,
Segment, StatusChoice, Task, Label, StorageMethodChoice, StorageChoice)
from cvat.apps.engine.media_extractors import ValidateDimension
from cvat.apps.engine.models import DimensionType
Segment, StatusChoice, Task, Label, StorageMethodChoice, StorageChoice, DimensionType,
SortingMethod)
from cvat.apps.engine.media_extractors import ValidateDimension, sort
from utils.dataset_manifest import ImageManifestManager, VideoManifestManager
def create_db_users(cls):
......@@ -2169,17 +2169,29 @@ class TaskImportExportAPITestCase(APITestCase):
with open(path, "wb") as image:
image.write(data.read())
cls.media_data.append(
{
**{"image_quality": 75,
"copy_data": True,
"start_frame": 2,
"stop_frame": 9,
"frame_filter": "step=2",
},
**{"server_files[{}]".format(i): imagename_pattern.format(i) for i in range(image_count)},
}
)
data = {
"image_quality": 75,
"copy_data": True,
"start_frame": 2,
"stop_frame": 9,
"frame_filter": "step=2",
**{"server_files[{}]".format(i): imagename_pattern.format(i) for i in range(image_count)},
}
use_cache_data = {
**data,
'use_cache': True,
}
cls.media_data.append(data)
data['sorting_method'] = SortingMethod.NATURAL
cls.media_data.append(data)
cls.media_data.append(use_cache_data)
use_cache_data['sorting_method'] = SortingMethod.NATURAL
cls.media_data.append(use_cache_data)
use_cache_data['sorting_method'] = SortingMethod.RANDOM
cls.media_data.append(use_cache_data)
filename = "test_video_1.mp4"
path = os.path.join(settings.SHARE_ROOT, filename)
......@@ -2267,13 +2279,47 @@ class TaskImportExportAPITestCase(APITestCase):
}
)
data = {
"client_files[0]": generate_image_file("test_1.jpg")[1],
"client_files[1]": generate_image_file("test_2.jpg")[1],
"client_files[2]": generate_image_file("test_10.jpg")[1],
"client_files[3]": generate_image_file("test_3.jpg")[1],
"image_quality": 75,
}
use_cache_data = {
**data,
'use_cache': True,
}
cls.media_data.extend([
# image list local
# sorted data
# natural: test_1.jpg, test_2.jpg, test_3.jpg, test_10.jpg
{
"client_files[0]": generate_image_file("test_1.jpg")[1],
"client_files[1]": generate_image_file("test_2.jpg")[1],
"client_files[2]": generate_image_file("test_3.jpg")[1],
"image_quality": 75,
**use_cache_data,
'sorting_method': SortingMethod.NATURAL,
},
{
**data,
'sorting_method': SortingMethod.NATURAL,
},
# random
{
**use_cache_data,
'sorting_method': SortingMethod.RANDOM,
},
# predefined: test_1.jpg, test_2.jpg, test_10.jpg, test_2.jpg
{
**use_cache_data,
'sorting_method': SortingMethod.PREDEFINED,
},
# lexicographical: test_1.jpg, test_10.jpg, test_2.jpg, test_3.jpg
{
**use_cache_data,
'sorting_method': SortingMethod.LEXICOGRAPHICAL,
},
{
**data,
'sorting_method': SortingMethod.LEXICOGRAPHICAL,
},
# video local
{
......@@ -2576,7 +2622,7 @@ def generate_manifest_file(data_type, manifest_path, sources):
kwargs = {
'images': {
'sources': sources,
'is_sorted': False,
'sorting_method': SortingMethod.LEXICOGRAPHICAL,
},
'video': {
'media_file': sources[0],
......@@ -2633,6 +2679,13 @@ class TaskDataAPITestCase(APITestCase):
image.write(data.read())
cls._image_sizes[filename] = img_size
filename = "test_10.jpg"
path = os.path.join(settings.SHARE_ROOT, filename)
img_size, data = generate_image_file(filename)
with open(path, "wb") as image:
image.write(data.read())
cls._image_sizes[filename] = img_size
filename = os.path.join("data", "test_3.jpg")
path = os.path.join(settings.SHARE_ROOT, filename)
os.makedirs(os.path.dirname(path))
......@@ -2732,6 +2785,9 @@ class TaskDataAPITestCase(APITestCase):
path = os.path.join(settings.SHARE_ROOT, "test_3.jpg")
os.remove(path)
path = os.path.join(settings.SHARE_ROOT, "test_10.jpg")
os.remove(path)
path = os.path.join(settings.SHARE_ROOT, "data", "test_3.jpg")
os.remove(path)
......@@ -2892,9 +2948,9 @@ class TaskDataAPITestCase(APITestCase):
client_files = [img for key, img in data.items() if key.startswith("client_files")]
if server_files:
source_files = [os.path.join(settings.SHARE_ROOT, f) for f in sorted(server_files)]
source_files = [os.path.join(settings.SHARE_ROOT, f) for f in sort(server_files, data.get('sorting_method', SortingMethod.LEXICOGRAPHICAL))]
else:
source_files = [f for f in sorted(client_files, key=lambda e: e.name)]
source_files = [f for f in sort(client_files, data.get('sorting_method', SortingMethod.LEXICOGRAPHICAL), func=lambda e: e.name)]
source_images = []
for f in source_files:
......@@ -3128,7 +3184,7 @@ class TaskDataAPITestCase(APITestCase):
image_sizes, StorageMethodChoice.CACHE, StorageChoice.LOCAL)
task_spec = {
"name": "cached images task without copying #16",
"name": "cached images task with default sorting data and without copying #16",
"overlap": 0,
"segment_size": 0,
"labels": [
......@@ -3140,14 +3196,14 @@ class TaskDataAPITestCase(APITestCase):
task_data = {
"server_files[0]": "test_1.jpg",
"server_files[1]": "test_2.jpg",
"server_files[2]": "test_3.jpg",
"server_files[2]": "test_10.jpg",
"image_quality": 70,
"use_cache": True,
}
image_sizes = [
self._image_sizes[task_data["server_files[0]"]],
self._image_sizes[task_data["server_files[1]"]],
self._image_sizes[task_data["server_files[2]"]],
self._image_sizes[task_data["server_files[1]"]],
]
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET,
......@@ -3381,6 +3437,44 @@ class TaskDataAPITestCase(APITestCase):
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET,
image_sizes, StorageMethodChoice.CACHE, StorageChoice.LOCAL)
# test predefined sorting
task_spec.update([('name', 'task custom data sequence #28')])
task_data = {
"server_files[0]": "test_1.jpg",
"server_files[1]": "test_3.jpg",
"server_files[2]": "test_2.jpg",
"image_quality": 70,
"use_cache": True,
"sorting_method": SortingMethod.PREDEFINED
}
image_sizes = [
self._image_sizes[task_data["server_files[0]"]],
self._image_sizes[task_data["server_files[1]"]],
self._image_sizes[task_data["server_files[2]"]],
]
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET,
image_sizes, StorageMethodChoice.CACHE, StorageChoice.SHARE)
# test a natural data sequence
task_spec.update([('name', 'task native data sequence #29')])
task_data = {
"server_files[0]": "test_10.jpg",
"server_files[1]": "test_2.jpg",
"server_files[2]": "test_1.jpg",
"image_quality": 70,
"use_cache": True,
"sorting_method": SortingMethod.NATURAL
}
image_sizes = [
self._image_sizes[task_data["server_files[2]"]],
self._image_sizes[task_data["server_files[1]"]],
self._image_sizes[task_data["server_files[0]"]],
]
self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET,
image_sizes, StorageMethodChoice.CACHE, StorageChoice.SHARE)
def test_api_v1_tasks_id_data_admin(self):
self._test_api_v1_tasks_id_data(self.admin)
......
......@@ -105,4 +105,4 @@ def parse_specific_attributes(specific_attributes):
return {
item.split('=')[0].strip(): item.split('=')[1].strip()
for item in specific_attributes.split('&')
} if specific_attributes else dict()
\ No newline at end of file
} if specific_attributes else dict()
......@@ -637,9 +637,8 @@ class TaskViewSet(auth.TaskGetQuerySetMixin, viewsets.ModelViewSet):
db_task.data = db_data
db_task.save()
data = {k:v for k, v in serializer.data.items()}
data['use_zip_chunks'] = serializer.validated_data['use_zip_chunks']
data['use_cache'] = serializer.validated_data['use_cache']
data['copy_data'] = serializer.validated_data['copy_data']
for extra_key in { 'use_zip_chunks', 'use_cache', 'copy_data' }:
data[extra_key] = serializer.validated_data[extra_key]
if data['use_cache']:
db_task.data.storage_method = StorageMethodChoice.CACHE
db_task.data.save(update_fields=['storage_method'])
......
......@@ -55,3 +55,4 @@ google-cloud-storage==1.42.0
# when pycocotools is installed by wheel in python 3.8+
datumaro==0.2.0 --no-binary=datumaro --no-binary=pycocotools
urllib3>=1.26.5 # not directly required, pinned by Snyk to avoid a vulnerability
natsort==8.0.0
......@@ -125,7 +125,15 @@ To create a 3D task, you need to use the following directory structures:
## Advanced configuration
![](/images/image128_use_cache.jpg)
![](/images/image128.jpg)
### Sorting method
Option to sort the data. It is not relevant for videos.
For example, the sequence `2.jpeg, 10.jpeg, 1.jpeg` after sorting will be:
- `lexicographical`: 1.jpeg, 10.jpeg, 2.jpeg
- `natural`: 1.jpeg, 2.jpeg, 10.jpeg
- `predefined`: 2.jpeg, 10.jpeg, 1.jpeg
### Use zip chunks
......
此差异由.gitattributes 抑制。
......@@ -45,6 +45,8 @@ class CLI():
data['copy_data'] = kwargs.get('copy_data')
if 'use_cache' in kwargs:
data['use_cache'] = kwargs.get('use_cache')
if 'sorting_method' in kwargs:
data['sorting_method'] = kwargs.get('sorting_method')
response = self.session.post(url, data=data, files=files)
response.raise_for_status()
......
......@@ -208,6 +208,13 @@ task_create_parser.add_argument(
action='store_false',
help='''set the option to use the cache (default: %(default)s)'''
)
task_create_parser.add_argument(
'--sorting-method',
default='lexicographical',
choices=['lexicographical', 'natural', 'predefined', 'random'],
help='''data soring method (default: %(default)s)'''
)
#######################################################################
# Delete
#######################################################################
......
......@@ -10,7 +10,7 @@ from contextlib import closing
from tempfile import NamedTemporaryFile
from PIL import Image
from .utils import md5_hash, rotate_image
from .utils import md5_hash, rotate_image, sort, SortingMethod
class VideoStreamReader:
def __init__(self, source_path, chunk_size, force):
......@@ -146,14 +146,14 @@ class DatasetImagesReader:
def __init__(self,
sources,
meta=None,
is_sorted=True,
sorting_method=SortingMethod.PREDEFINED,
use_image_hash=False,
start = 0,
step = 1,
stop = None,
*args,
**kwargs):
self._sources = sources if is_sorted else sorted(sources)
self._sources = sort(sources, sorting_method)
self._meta = meta
self._data_dir = kwargs.get('data_dir', None)
self._use_image_hash = use_image_hash
......@@ -601,11 +601,18 @@ class ImageManifestManager(_ManifestManager):
return (f"{image['name']}{image['extension']}" for _, image in self)
def get_subset(self, subset_names):
return ({
'name': f"{image['name']}",
'extension': f"{image['extension']}",
'width': image['width'],
'height': image['height'],
'meta': image['meta'],
'checksum': f"{image['checksum']}"
} for _, image in self if f"{image['name']}{image['extension']}" in subset_names)
index_list = []
subset = []
for _, image in self:
image_name = f"{image['name']}{image['extension']}"
if image_name in subset_names:
index_list.append(subset_names.index(image_name))
subset.append({
'name': f"{image['name']}",
'extension': f"{image['extension']}",
'width': image['width'],
'height': image['height'],
'meta': image['meta'],
'checksum': f"{image['checksum']}"
})
return index_list, subset
......@@ -17,6 +17,8 @@ def get_args():
'if by default the video does not meet the requirements and a manifest file is not prepared')
parser.add_argument('--output-dir',type=str, help='Directory where the manifest file will be saved',
default=os.getcwd())
parser.add_argument('--sorting', choices=['lexicographical', 'natural', 'predefined', 'random'],
type=str, default='lexicographical')
parser.add_argument('source', type=str, help='Source paths')
return parser.parse_args()
......@@ -63,7 +65,7 @@ def main():
try:
assert len(sources), 'A images was not found'
manifest = ImageManifestManager(manifest_path=manifest_directory)
manifest.link(sources=sources, meta=meta, is_sorted=False,
manifest.link(sources=sources, meta=meta, sorting_method=args.sorting,
use_image_hash=True, data_dir=data_dir)
manifest.create(_tqdm=tqdm)
except Exception as ex:
......
av==8.0.2 --no-binary=av
opencv-python-headless==4.4.0.42
Pillow==7.2.0
tqdm==4.58.0
\ No newline at end of file
tqdm==4.58.0
natsort==8.0.0
# Copyright (C) 2021 Intel Corporation
#
# SPDX-License-Identifier: MIT
import os
import re
import hashlib
import mimetypes
import cv2 as cv
from av import VideoFrame
from enum import Enum
from natsort import os_sorted
from random import shuffle
def rotate_image(image, angle):
height, width = image.shape[:2]
......@@ -187,3 +191,29 @@ def detect_related_images(image_paths, root_path):
elif data_are_3d:
return _detect_related_images_3D(image_paths, root_path)
return {}
class SortingMethod(str, Enum):
LEXICOGRAPHICAL = 'lexicographical'
NATURAL = 'natural'
PREDEFINED = 'predefined'
RANDOM = 'random'
@classmethod
def choices(cls):
return tuple((x.value, x.name) for x in cls)
def __str__(self):
return self.value
def sort(images, sorting_method=SortingMethod.LEXICOGRAPHICAL, func=None):
if sorting_method == SortingMethod.LEXICOGRAPHICAL:
return sorted(images, key=func)
elif sorting_method == SortingMethod.NATURAL:
return os_sorted(images, key=func)
elif sorting_method == SortingMethod.PREDEFINED:
return images
elif sorting_method == SortingMethod.RANDOM:
shuffle(images)
return images
else:
raise NotImplementedError()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册