Added sorting methods (#3937)

68fbcdec · Maria Khrustaleva · GitHub · 2ed9f9cd · 68fbcdec · 68fbcdec
29 changed file
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Google Cloud Storage support in UI (<https://github.com/openvinotoolkit/cvat/pull/3919>)
 - Add project tasks paginations (<https://github.com/openvinotoolkit/cvat/pull/3910>)
 - Add remove issue button (<https://github.com/openvinotoolkit/cvat/pull/3952>)
+- Data sorting option (<https://github.com/openvinotoolkit/cvat/pull/3937>)
 - Options to change font size & position of text labels on the canvas (<https://github.com/openvinotoolkit/cvat/pull/3972>)
 - Add "tag" return type for automatic annotation in Nuclio (<https://github.com/openvinotoolkit/cvat/pull/3896>)


--- a/cvat-core/package-lock.json
+++ b/cvat-core/package-lock.json
 {
  "name": "cvat-core",
-  "version": "3.20.1",
+  "version": "3.21.0",
  "lockfileVersion": 2,
  "requires": true,
  "packages": {
    "": {
      "name": "cvat-core",
-      "version": "3.20.1",
+      "version": "3.21.0",
      "license": "MIT",
      "dependencies": {
        "axios": "^0.21.4",

--- a/cvat-core/package.json
+++ b/cvat-core/package.json
 {
  "name": "cvat-core",
-  "version": "3.20.1",
+  "version": "3.21.0",
  "description": "Part of Computer Vision Tool which presents an interface for client-side integration",
  "main": "babel.config.js",
  "scripts": {

--- a/cvat-core/src/enums.js
+++ b/cvat-core/src/enums.js
@@ -367,6 +367,24 @@
        KEY_FILE_PATH: 'KEY_FILE_PATH',
    });

+    /**
+     * Sorting methods
+     * @enum {string}
+     * @name SortingMethod
+     * @memberof module:API.cvat.enums
+     * @property {string} LEXICOGRAPHICAL 'lexicographical'
+     * @property {string} NATURAL 'natural'
+     * @property {string} PREDEFINED 'predefined'
+     * @property {string} RANDOM 'random'
+     * @readonly
+     */
+    const SortingMethod = Object.freeze({
+        LEXICOGRAPHICAL: 'lexicographical',
+        NATURAL: 'natural',
+        PREDEFINED: 'predefined',
+        RANDOM: 'random',
+    });
+
    module.exports = {
        ShareFileType,
        TaskStatus,
@@ -384,5 +402,6 @@
        DimensionType,
        CloudStorageProviderType,
        CloudStorageCredentialsType,
+        SortingMethod,
    };
 })();
--- a/cvat-core/src/session.js
+++ b/cvat-core/src/session.js
@@ -1017,6 +1017,7 @@
                copy_data: undefined,
                dimension: undefined,
                cloud_storage_id: undefined,
+                sorting_method: undefined,
            };

            const updatedFields = new FieldUpdateTrigger({
@@ -1549,6 +1550,16 @@
                    cloudStorageId: {
                        get: () => data.cloud_storage_id,
                    },
+                    sortingMethod: {
+                        /**
+                         * @name sortingMethod
+                         * @type {module:API.cvat.enums.SortingMethod}
+                         * @memberof module:API.cvat.classes.Task
+                         * @instance
+                         * @readonly
+                         */
+                        get: () => data.sorting_method,
+                    },
                    _internalData: {
                        get: () => data,
                    },
@@ -2061,6 +2072,7 @@
            image_quality: this.imageQuality,
            use_zip_chunks: this.useZipChunks,
            use_cache: this.useCache,
+            sorting_method: this.sortingMethod,
        };

        if (typeof this.startFrame !== 'undefined') {

--- a/cvat-ui/package-lock.json
+++ b/cvat-ui/package-lock.json
 {
  "name": "cvat-ui",
-  "version": "1.28.1",
+  "version": "1.28.2",
  "lockfileVersion": 2,
  "requires": true,
  "packages": {
    "": {
      "name": "cvat-ui",
-      "version": "1.28.1",
+      "version": "1.28.2",
      "license": "MIT",
      "dependencies": {
        "@ant-design/icons": "^4.6.3",

--- a/cvat-ui/package.json
+++ b/cvat-ui/package.json
 {
  "name": "cvat-ui",
-  "version": "1.28.1",
+  "version": "1.28.2",
  "description": "CVAT single-page application",
  "main": "src/index.tsx",
  "scripts": {

--- a/cvat-ui/src/actions/tasks-actions.ts
+++ b/cvat-ui/src/actions/tasks-actions.ts
@@ -353,6 +353,7 @@ export function createTaskAsync(data: any): ThunkAction<Promise<void>, {}, {}, A
            image_quality: 70,
            use_zip_chunks: data.advanced.useZipChunks,
            use_cache: data.advanced.useCache,
+            sorting_method: data.advanced.sortingMethod,
        };

        if (data.projectId) {

--- a/cvat-ui/src/components/create-task-page/advanced-configuration-form.tsx
+++ b/cvat-ui/src/components/create-task-page/advanced-configuration-form.tsx
@@ -7,6 +7,7 @@ import { Row, Col } from 'antd/lib/grid';
 import { PercentageOutlined } from '@ant-design/icons';
 import Input from 'antd/lib/input';
 import Select from 'antd/lib/select';
+import Radio from 'antd/lib/radio';
 import Checkbox from 'antd/lib/checkbox';
 import Form, { FormInstance, RuleObject, RuleRender } from 'antd/lib/form';
 import Text from 'antd/lib/typography/Text';
@@ -16,6 +17,13 @@ import patterns from 'utils/validation-patterns';

 const { Option } = Select;

+export enum SortingMethod {
+    LEXICOGRAPHICAL = 'lexicographical',
+    NATURAL = 'natural',
+    PREDEFINED = 'predefined',
+    RANDOM = 'random',
+}
+
 export interface AdvancedConfiguration {
    bugTracker?: string;
    imageQuality?: number;
@@ -31,6 +39,7 @@ export interface AdvancedConfiguration {
    dataChunkSize?: number;
    useCache: boolean;
    copyData?: boolean;
+    sortingMethod: SortingMethod;
 }

 const initialValues: AdvancedConfiguration = {
@@ -39,6 +48,7 @@ const initialValues: AdvancedConfiguration = {
    useZipChunks: true,
    useCache: true,
    copyData: false,
+    sortingMethod: SortingMethod.LEXICOGRAPHICAL,
 };

 interface Props {
@@ -178,6 +188,33 @@ class AdvancedConfigurationForm extends React.PureComponent<Props> {
        );
    }

+    private renderSortingMethodRadio(): JSX.Element {
+        return (
+            <Form.Item
+                label='Sorting method'
+                name='sortingMethod'
+                rules={[
+                    {
+                        required: true,
+                        message: 'The field is required.',
+                    },
+                ]}
+                help='Specify how to sort images. It is not relevant for videos.'
+            >
+                <Radio.Group>
+                    <Radio value={SortingMethod.LEXICOGRAPHICAL} key={SortingMethod.LEXICOGRAPHICAL}>
+                        Lexicographical
+                    </Radio>
+                    <Radio value={SortingMethod.NATURAL} key={SortingMethod.NATURAL}>Natural</Radio>
+                    <Radio value={SortingMethod.PREDEFINED} key={SortingMethod.PREDEFINED}>
+                        Predefined
+                    </Radio>
+                    <Radio value={SortingMethod.RANDOM} key={SortingMethod.RANDOM}>Random</Radio>
+                </Radio.Group>
+            </Form.Item>
+        );
+    }
+
    private renderImageQuality(): JSX.Element {
        return (
            <CVATTooltip title='Defines images compression level'>
@@ -290,8 +327,7 @@ class AdvancedConfigurationForm extends React.PureComponent<Props> {
            >
                <Select style={{ width: '100%' }} initialValue='CVAT for video 1.1'>
                    {
-                        dumpers.map((dumper: any) =>
-                            <Option value={dumper.name}>{dumper.name}</Option>)
+                        dumpers.map((dumper: any) => <Option value={dumper.name}>{dumper.name}</Option>)
                    }
                </Select>
            </Form.Item>
@@ -384,6 +420,9 @@ class AdvancedConfigurationForm extends React.PureComponent<Props> {
        const { installedGit, activeFileManagerTab } = this.props;
        return (
            <Form initialValues={initialValues} ref={this.formRef} layout='vertical'>
+                <Row>
+                    <Col>{this.renderSortingMethodRadio()}</Col>
+                </Row>
                {activeFileManagerTab === 'share' ? (
                    <Row>
                        <Col>{this.renderCopyDataChechbox()}</Col>

--- a/cvat-ui/src/components/create-task-page/create-task-content.tsx
+++ b/cvat-ui/src/components/create-task-page/create-task-content.tsx
@@ -20,7 +20,7 @@ import { Files } from 'components/file-manager/file-manager';
 import BasicConfigurationForm, { BaseConfiguration } from './basic-configuration-form';
 import ProjectSearchField from './project-search-field';
 import ProjectSubsetField from './project-subset-field';
-import AdvancedConfigurationForm, { AdvancedConfiguration } from './advanced-configuration-form';
+import AdvancedConfigurationForm, { AdvancedConfiguration, SortingMethod } from './advanced-configuration-form';

 export interface CreateTaskData {
    projectId: number | null;
@@ -54,6 +54,7 @@ const defaultState = {
        lfs: false,
        useZipChunks: true,
        useCache: true,
+        sortingMethod: SortingMethod.LEXICOGRAPHICAL,
    },
    labels: [],
    files: {

--- a/cvat/apps/engine/backup.py
+++ b/cvat/apps/engine/backup.py
@@ -65,6 +65,7 @@ class _TaskBackupBase():
            'chunk_type',
            'storage_method',
            'storage',
+            'sorting_method',
        }

        self._prepare_meta(allowed_fields, data)

--- a/cvat/apps/engine/media_extractors.py
+++ b/cvat/apps/engine/media_extractors.py
@@ -14,11 +14,13 @@ from contextlib import closing

 import av
 import numpy as np
+from natsort import os_sorted
 from pyunpack import Archive
 from PIL import Image, ImageFile
+from random import shuffle
 import open3d as o3d
 from cvat.apps.engine.utils import rotate_image
-from cvat.apps.engine.models import DimensionType
+from cvat.apps.engine.models import DimensionType, SortingMethod

 # fixes: "OSError:broken data stream" when executing line 72 while loading images downloaded from the web
 # see: https://stackoverflow.com/questions/42462431/oserror-broken-data-stream-when-reading-image-file
@@ -47,9 +49,22 @@ def files_to_ignore(directory):
        return True
    return False

+def sort(images, sorting_method=SortingMethod.LEXICOGRAPHICAL, func=None):
+    if sorting_method == SortingMethod.LEXICOGRAPHICAL:
+        return sorted(images, key=func)
+    elif sorting_method == SortingMethod.NATURAL:
+        return os_sorted(images, key=func)
+    elif sorting_method == SortingMethod.PREDEFINED:
+        return images
+    elif sorting_method == SortingMethod.RANDOM:
+        shuffle(images)
+        return images
+    else:
+        raise NotImplementedError()
+
 class IMediaReader(ABC):
    def __init__(self, source_path, step, start, stop, dimension):
-        self._source_path = sorted(source_path)
+        self._source_path = source_path
        self._step = step
        self._start = start
        self._stop = stop
@@ -90,7 +105,13 @@ class IMediaReader(ABC):
        return range(self._start, self._stop, self._step)

 class ImageListReader(IMediaReader):
-    def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D):
+    def __init__(self,
+                source_path,
+                step=1,
+                start=0,
+                stop=None,
+                dimension=DimensionType.DIM_2D,
+                sorting_method=SortingMethod.LEXICOGRAPHICAL):
        if not source_path:
            raise Exception('No image found')

@@ -102,13 +123,15 @@ class ImageListReader(IMediaReader):
        assert stop > start

        super().__init__(
-            source_path=source_path,
+            source_path=sort(source_path, sorting_method),
            step=step,
            start=start,
            stop=stop,
            dimension=dimension
        )

+        self._sorting_method = sorting_method
+
    def __iter__(self):
        for i in range(self._start, self._stop, self._step):
            yield (self.get_image(i), self.get_path(i), i)
@@ -121,7 +144,8 @@ class ImageListReader(IMediaReader):
            step=self._step,
            start=self._start,
            stop=self._stop,
-            dimension=self._dimension
+            dimension=self._dimension,
+            sorting_method=self._sorting_method
        )

    def get_path(self, i):
@@ -154,7 +178,8 @@ class ImageListReader(IMediaReader):
            source_path=source_files,
            step=step,
            start=start,
-            stop=stop
+            stop=stop,
+            sorting_method=self._sorting_method,
        )
        self._dimension = dimension

@@ -163,7 +188,13 @@ class ImageListReader(IMediaReader):
        return [self.get_path(idx) for idx, _ in enumerate(self._source_path)]

 class DirectoryReader(ImageListReader):
-    def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D):
+    def __init__(self,
+                source_path,
+                step=1,
+                start=0,
+                stop=None,
+                dimension=DimensionType.DIM_2D,
+                sorting_method=SortingMethod.LEXICOGRAPHICAL):
        image_paths = []
        for source in source_path:
            for root, _, files in os.walk(source):
@@ -176,10 +207,17 @@ class DirectoryReader(ImageListReader):
            start=start,
            stop=stop,
            dimension=dimension,
+            sorting_method=sorting_method,
        )

 class ArchiveReader(DirectoryReader):
-    def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D):
+    def __init__(self,
+                source_path,
+                step=1,
+                start=0,
+                stop=None,
+                dimension=DimensionType.DIM_2D,
+                sorting_method=SortingMethod.LEXICOGRAPHICAL):
        self._archive_source = source_path[0]
        extract_dir = source_path[1] if len(source_path) > 1 else os.path.dirname(source_path[0])
        Archive(self._archive_source).extractall(extract_dir)
@@ -190,11 +228,18 @@ class ArchiveReader(DirectoryReader):
            step=step,
            start=start,
            stop=stop,
-            dimension=dimension
+            dimension=dimension,
+            sorting_method=sorting_method,
        )

 class PdfReader(ImageListReader):
-    def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D):
+    def __init__(self,
+                source_path,
+                step=1,
+                start=0,
+                stop=None,
+                dimension=DimensionType.DIM_2D,
+                sorting_method=SortingMethod.LEXICOGRAPHICAL):
        if not source_path:
            raise Exception('No PDF found')

@@ -223,14 +268,26 @@ class PdfReader(ImageListReader):
            start=start,
            stop=stop,
            dimension=dimension,
+            sorting_method=sorting_method,
        )

 class ZipReader(ImageListReader):
-    def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D):
+    def __init__(self,
+                source_path,
+                step=1,
+                start=0,
+                stop=None,
+                dimension=DimensionType.DIM_2D,
+                sorting_method=SortingMethod.LEXICOGRAPHICAL):
        self._zip_source = zipfile.ZipFile(source_path[0], mode='r')
        self.extract_dir = source_path[1] if len(source_path) > 1 else None
        file_list = [f for f in self._zip_source.namelist() if files_to_ignore(f) and get_mime(f) == 'image']
-        super().__init__(file_list, step=step, start=start, stop=stop, dimension=dimension)
+        super().__init__(file_list,
+                        step=step,
+                        start=start,
+                        stop=stop,
+                        dimension=dimension,
+                        sorting_method=sorting_method)

    def __del__(self):
        self._zip_source.close()

--- a/cvat/apps/engine/migrations/0045_data_sorting_method.py
+++ b/cvat/apps/engine/migrations/0045_data_sorting_method.py
+# Generated by Django 3.1.13 on 2021-12-03 08:06
+
+import cvat.apps.engine.models
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('engine', '0044_auto_20211123_0824'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='data',
+            name='sorting_method',
+            field=models.CharField(choices=[('lexicographical', 'LEXICOGRAPHICAL'), ('natural', 'NATURAL'), ('predefined', 'PREDEFINED'), ('random', 'RANDOM')], default=cvat.apps.engine.models.SortingMethod['LEXICOGRAPHICAL'], max_length=15),
+        ),
+    ]
--- a/cvat/apps/engine/models.py
+++ b/cvat/apps/engine/models.py
@@ -81,6 +81,19 @@ class StorageChoice(str, Enum):
    def __str__(self):
        return self.value

+class SortingMethod(str, Enum):
+    LEXICOGRAPHICAL = 'lexicographical'
+    NATURAL = 'natural'
+    PREDEFINED = 'predefined'
+    RANDOM = 'random'
+
+    @classmethod
+    def choices(cls):
+        return tuple((x.value, x.name) for x in cls)
+
+    def __str__(self):
+        return self.value
+
 class Data(models.Model):
    chunk_size = models.PositiveIntegerField(null=True)
    size = models.PositiveIntegerField(default=0)
@@ -95,6 +108,7 @@ class Data(models.Model):
    storage_method = models.CharField(max_length=15, choices=StorageMethodChoice.choices(), default=StorageMethodChoice.FILE_SYSTEM)
    storage = models.CharField(max_length=15, choices=StorageChoice.choices(), default=StorageChoice.LOCAL)
    cloud_storage = models.ForeignKey('CloudStorage', on_delete=models.SET_NULL, null=True, related_name='data')
+    sorting_method = models.CharField(max_length=15, choices=SortingMethod.choices(), default=SortingMethod.LEXICOGRAPHICAL)

    class Meta:
        default_permissions = ()

--- a/cvat/apps/engine/serializers.py
+++ b/cvat/apps/engine/serializers.py
@@ -281,7 +281,7 @@ class DataSerializer(serializers.ModelSerializer):
        model = models.Data
        fields = ('chunk_size', 'size', 'image_quality', 'start_frame', 'stop_frame', 'frame_filter',
            'compressed_chunk_type', 'original_chunk_type', 'client_files', 'server_files', 'remote_files', 'use_zip_chunks',
-            'cloud_storage_id', 'use_cache', 'copy_data', 'storage_method', 'storage')
+            'cloud_storage_id', 'use_cache', 'copy_data', 'storage_method', 'storage', 'sorting_method')

    # pylint: disable=no-self-use
    def validate_frame_filter(self, value):
@@ -308,9 +308,9 @@ class DataSerializer(serializers.ModelSerializer):
        client_files = validated_data.pop('client_files')
        server_files = validated_data.pop('server_files')
        remote_files = validated_data.pop('remote_files')
-        validated_data.pop('use_zip_chunks')
-        validated_data.pop('use_cache')
-        validated_data.pop('copy_data')
+        for extra_key in { 'use_zip_chunks', 'use_cache', 'copy_data' }:
+            validated_data.pop(extra_key)
+
        db_data = models.Data.objects.create(**validated_data)

        data_path = db_data.get_data_dirname()

--- a/cvat/apps/engine/task.py
+++ b/cvat/apps/engine/task.py
@@ -22,7 +22,7 @@ from django.db import transaction
 from cvat.apps.engine import models
 from cvat.apps.engine.log import slogger
 from cvat.apps.engine.media_extractors import (MEDIA_TYPES, Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter,
-    ValidateDimension, ZipChunkWriter, ZipCompressedChunkWriter, get_mime)
+    ValidateDimension, ZipChunkWriter, ZipCompressedChunkWriter, get_mime, sort)
 from cvat.apps.engine.utils import av_scan_paths
 from utils.dataset_manifest import ImageManifestManager, VideoManifestManager
 from utils.dataset_manifest.core import VideoManifestValidator
@@ -123,15 +123,18 @@ def _count_files(data, manifest_file=None):
            raise ValueError("Bad file path: " + path)
        server_files.append(path)

-    server_files.sort(reverse=True)
+    sorted_server_files = sorted(server_files, reverse=True)
    # The idea of the code is trivial. After sort we will have files in the
    # following order: 'a/b/c/d/2.txt', 'a/b/c/d/1.txt', 'a/b/c/d', 'a/b/c'
    # Let's keep all items which aren't substrings of the previous item. In
    # the example above only 2.txt and 1.txt files will be in the final list.
    # Also need to correctly handle 'a/b/c0', 'a/b/c' case.
-    data['server_files'] = [v[1] for v in zip([""] + server_files, server_files)
+    without_extra_dirs = [v[1] for v in zip([""] + sorted_server_files, sorted_server_files)
        if not os.path.dirname(v[0]).startswith(v[1])]

+    # we need to keep the original sequence of files
+    data['server_files'] = [f for f in server_files if f in without_extra_dirs]
+
    def count_files(file_mapping, counter):
        for rel_path, full_path in file_mapping.items():
            mime = get_mime(full_path)
@@ -141,7 +144,7 @@ def _count_files(data, manifest_file=None):
                manifest_file.append(rel_path)
            else:
                slogger.glob.warn("Skip '{}' file (its mime type doesn't "
-                    "correspond to a video or an image file)".format(full_path))
+                    "correspond to supported MIME file type)".format(full_path))

    counter = { media_type: [] for media_type in MEDIA_TYPES.keys() }

@@ -213,6 +216,7 @@ def _download_data(urls, upload_dir):
 def _get_manifest_frame_indexer(start_frame=0, frame_step=1):
    return lambda frame_id: start_frame + frame_id * frame_step

+
 @transaction.atomic
 def _create_thread(tid, data, isImport=False):
    slogger.glob.info("create task #{}".format(tid))
@@ -222,15 +226,13 @@ def _create_thread(tid, data, isImport=False):
    upload_dir = db_data.get_upload_dirname()

    if data['remote_files']:
-        if db_data.storage != models.StorageChoice.CLOUD_STORAGE:
-            data['remote_files'] = _download_data(data['remote_files'], upload_dir)
+        data['remote_files'] = _download_data(data['remote_files'], upload_dir)

    manifest_file = []
    media = _count_files(data, manifest_file)
    media, task_mode = _validate_data(media, manifest_file)
-    if manifest_file:
-        assert settings.USE_CACHE and db_data.storage_method == models.StorageMethodChoice.CACHE, \
-            "File with meta information can be uploaded if 'Use cache' option is also selected"
+    if manifest_file and (not settings.USE_CACHE or db_data.storage_method != models.StorageMethodChoice.CACHE):
+        raise Exception("File with meta information can be uploaded if 'Use cache' option is also selected")

    if data['server_files']:
        if db_data.storage == models.StorageChoice.LOCAL:
@@ -252,19 +254,22 @@ def _create_thread(tid, data, isImport=False):
                'specific_attributes': db_cloud_storage.get_specific_attributes()
            }
            cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details)
-            first_sorted_media_image = sorted(media['image'])[0]
+            sorted_media = sort(media['image'], data['sorting_method'])
+            first_sorted_media_image = sorted_media[0]
            cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image))

            # prepare task manifest file from cloud storage manifest file
+            # NOTE we should create manifest before defining chunk_size
+            # FIXME in the future when will be implemented archive support
            manifest = ImageManifestManager(db_data.get_manifest_path())
            cloud_storage_manifest = ImageManifestManager(
                os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file[0]),
                db_data.cloud_storage.get_storage_dirname()
            )
            cloud_storage_manifest.set_index()
-            media_files = sorted(media['image'])
-            content = cloud_storage_manifest.get_subset(media_files)
-            manifest.create(content)
+            sequence, content = cloud_storage_manifest.get_subset(sorted_media)
+            sorted_content = (i[1] for i in sorted(zip(sequence, content)))
+            manifest.create(sorted_content)

    av_scan_paths(upload_dir)

@@ -292,24 +297,48 @@ def _create_thread(tid, data, isImport=False):
        if media_files:
            if extractor is not None:
                raise Exception('Combined data types are not supported')
-            source_paths=[os.path.join(upload_dir, f) for f in media_files]
-            if media_type in {'archive', 'zip'} and db_data.storage == models.StorageChoice.SHARE:
-                source_paths.append(db_data.get_upload_dirname())
-                upload_dir = db_data.get_upload_dirname()
-                db_data.storage = models.StorageChoice.LOCAL
            if isImport and media_type == 'image' and db_data.storage == models.StorageChoice.SHARE:
                manifest_index = _get_manifest_frame_indexer(db_data.start_frame, db_data.get_frame_step())
                db_data.start_frame = 0
                data['stop_frame'] = None
                db_data.frame_filter = ''
+            if isImport and media_type != 'video' and db_data.storage_method == models.StorageMethodChoice.CACHE:
+                # we should sort media_files according to the manifest content sequence
+                manifest = ImageManifestManager(db_data.get_manifest_path())
+                manifest.set_index()
+                sorted_media_files = []
+                for idx in range(len(media_files)):
+                    properties = manifest[manifest_index(idx)]
+                    image_name = properties.get('name', None)
+                    image_extension = properties.get('extension', None)
+
+                    full_image_path = f"{image_name}{image_extension}" if image_name and image_extension else None
+                    if full_image_path and full_image_path in media_files:
+                        sorted_media_files.append(full_image_path)
+                media_files = sorted_media_files.copy()
+                del sorted_media_files
+                data['sorting_method'] = models.SortingMethod.PREDEFINED
+            source_paths=[os.path.join(upload_dir, f) for f in media_files]
+            if manifest_file and not isImport and data['sorting_method'] in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED}:
+                raise Exception("It isn't supported to upload manifest file and use random sorting")
+            if isImport and db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM and \
+                    data['sorting_method'] in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED}:
+                raise Exception("It isn't supported to import the task that was created without cache but with random/predefined sorting")

-            extractor = MEDIA_TYPES[media_type]['extractor'](
-                source_path=source_paths,
-                step=db_data.get_frame_step(),
-                start=db_data.start_frame,
-                stop=data['stop_frame'],
-            )
+            if media_type in {'archive', 'zip'} and db_data.storage == models.StorageChoice.SHARE:
+                source_paths.append(db_data.get_upload_dirname())
+                upload_dir = db_data.get_upload_dirname()
+                db_data.storage = models.StorageChoice.LOCAL

+            details = {
+                'source_path': source_paths,
+                'step': db_data.get_frame_step(),
+                'start': db_data.start_frame,
+                'stop': data['stop_frame'],
+            }
+            if media_type != 'video':
+                details['sorting_method'] = data['sorting_method']
+            extractor = MEDIA_TYPES[media_type]['extractor'](**details)

    validate_dimension = ValidateDimension()
    if isinstance(extractor, MEDIA_TYPES['zip']['extractor']):
@@ -474,8 +503,12 @@ def _create_thread(tid, data, isImport=False):
                    chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames]
                    img_sizes = []

-                    for _, frame_id in chunk_paths:
+                    for chunk_path, frame_id in chunk_paths:
                        properties = manifest[manifest_index(frame_id)]
+
+                        # check mapping
+                        if not chunk_path.endswith(f"{properties['name']}{properties['extension']}"):
+                            raise Exception('Incorrect file mapping to manifest content')
                        if db_task.dimension == models.DimensionType.DIM_2D:
                            resolution = (properties['width'], properties['height'])
                        else:

--- a/cvat/apps/engine/tests/test_rest_api.py
+++ b/cvat/apps/engine/tests/test_rest_api.py
@@ -30,9 +30,9 @@ from rest_framework.test import APIClient, APITestCase

 from datumaro.util.test_utils import TestDir
 from cvat.apps.engine.models import (AttributeSpec, AttributeType, Data, Job, Project,
-    Segment, StatusChoice, Task, Label, StorageMethodChoice, StorageChoice)
-from cvat.apps.engine.media_extractors import ValidateDimension
-from cvat.apps.engine.models import DimensionType
+    Segment, StatusChoice, Task, Label, StorageMethodChoice, StorageChoice, DimensionType,
+    SortingMethod)
+from cvat.apps.engine.media_extractors import ValidateDimension, sort
 from utils.dataset_manifest import ImageManifestManager, VideoManifestManager

 def create_db_users(cls):
@@ -2169,17 +2169,29 @@ class TaskImportExportAPITestCase(APITestCase):
            with open(path, "wb") as image:
                image.write(data.read())

-        cls.media_data.append(
-            {
-                **{"image_quality": 75,
-                   "copy_data": True,
-                   "start_frame": 2,
-                   "stop_frame": 9,
-                   "frame_filter": "step=2",
-                },
-                **{"server_files[{}]".format(i): imagename_pattern.format(i) for i in range(image_count)},
-            }
-        )
+        data = {
+            "image_quality": 75,
+            "copy_data": True,
+            "start_frame": 2,
+            "stop_frame": 9,
+            "frame_filter": "step=2",
+            **{"server_files[{}]".format(i): imagename_pattern.format(i) for i in range(image_count)},
+        }
+        use_cache_data = {
+            **data,
+            'use_cache': True,
+        }
+        cls.media_data.append(data)
+
+        data['sorting_method'] = SortingMethod.NATURAL
+        cls.media_data.append(data)
+        cls.media_data.append(use_cache_data)
+
+        use_cache_data['sorting_method'] = SortingMethod.NATURAL
+        cls.media_data.append(use_cache_data)
+
+        use_cache_data['sorting_method'] = SortingMethod.RANDOM
+        cls.media_data.append(use_cache_data)

        filename = "test_video_1.mp4"
        path = os.path.join(settings.SHARE_ROOT, filename)
@@ -2267,13 +2279,47 @@ class TaskImportExportAPITestCase(APITestCase):
            }
        )

+        data = {
+            "client_files[0]": generate_image_file("test_1.jpg")[1],
+            "client_files[1]": generate_image_file("test_2.jpg")[1],
+            "client_files[2]": generate_image_file("test_10.jpg")[1],
+            "client_files[3]": generate_image_file("test_3.jpg")[1],
+            "image_quality": 75,
+        }
+        use_cache_data = {
+            **data,
+            'use_cache': True,
+        }
        cls.media_data.extend([
            # image list local
+            # sorted data
+            # natural: test_1.jpg, test_2.jpg, test_3.jpg, test_10.jpg
            {
-                "client_files[0]": generate_image_file("test_1.jpg")[1],
-                "client_files[1]": generate_image_file("test_2.jpg")[1],
-                "client_files[2]": generate_image_file("test_3.jpg")[1],
-                "image_quality": 75,
+                **use_cache_data,
+                'sorting_method': SortingMethod.NATURAL,
+            },
+            {
+                **data,
+                'sorting_method': SortingMethod.NATURAL,
+            },
+            # random
+            {
+                **use_cache_data,
+                'sorting_method': SortingMethod.RANDOM,
+            },
+            # predefined: test_1.jpg, test_2.jpg, test_10.jpg, test_2.jpg
+            {
+                **use_cache_data,
+                'sorting_method': SortingMethod.PREDEFINED,
+            },
+            # lexicographical: test_1.jpg, test_10.jpg, test_2.jpg, test_3.jpg
+            {
+                **use_cache_data,
+                'sorting_method': SortingMethod.LEXICOGRAPHICAL,
+            },
+            {
+                **data,
+                'sorting_method': SortingMethod.LEXICOGRAPHICAL,
            },
            # video local
            {
@@ -2576,7 +2622,7 @@ def generate_manifest_file(data_type, manifest_path, sources):
    kwargs = {
        'images': {
            'sources': sources,
-            'is_sorted': False,
+            'sorting_method': SortingMethod.LEXICOGRAPHICAL,
        },
        'video': {
            'media_file': sources[0],
@@ -2633,6 +2679,13 @@ class TaskDataAPITestCase(APITestCase):
            image.write(data.read())
        cls._image_sizes[filename] = img_size

+        filename = "test_10.jpg"
+        path = os.path.join(settings.SHARE_ROOT, filename)
+        img_size, data = generate_image_file(filename)
+        with open(path, "wb") as image:
+            image.write(data.read())
+        cls._image_sizes[filename] = img_size
+
        filename = os.path.join("data", "test_3.jpg")
        path = os.path.join(settings.SHARE_ROOT, filename)
        os.makedirs(os.path.dirname(path))
@@ -2732,6 +2785,9 @@ class TaskDataAPITestCase(APITestCase):
        path = os.path.join(settings.SHARE_ROOT, "test_3.jpg")
        os.remove(path)

+        path = os.path.join(settings.SHARE_ROOT, "test_10.jpg")
+        os.remove(path)
+
        path = os.path.join(settings.SHARE_ROOT, "data", "test_3.jpg")
        os.remove(path)

@@ -2892,9 +2948,9 @@ class TaskDataAPITestCase(APITestCase):
                client_files = [img for key, img in data.items() if key.startswith("client_files")]

                if server_files:
-                    source_files = [os.path.join(settings.SHARE_ROOT, f) for f in sorted(server_files)]
+                    source_files = [os.path.join(settings.SHARE_ROOT, f) for f in sort(server_files, data.get('sorting_method', SortingMethod.LEXICOGRAPHICAL))]
                else:
-                    source_files = [f for f in sorted(client_files, key=lambda e: e.name)]
+                    source_files = [f for f in sort(client_files, data.get('sorting_method', SortingMethod.LEXICOGRAPHICAL), func=lambda e: e.name)]

                source_images = []
                for f in source_files:
@@ -3128,7 +3184,7 @@ class TaskDataAPITestCase(APITestCase):
                                             image_sizes, StorageMethodChoice.CACHE, StorageChoice.LOCAL)

        task_spec = {
-            "name": "cached images task without copying #16",
+            "name": "cached images task with default sorting data and without copying #16",
            "overlap": 0,
            "segment_size": 0,
            "labels": [
@@ -3140,14 +3196,14 @@ class TaskDataAPITestCase(APITestCase):
        task_data = {
            "server_files[0]": "test_1.jpg",
            "server_files[1]": "test_2.jpg",
-            "server_files[2]": "test_3.jpg",
+            "server_files[2]": "test_10.jpg",
            "image_quality": 70,
            "use_cache": True,
        }
        image_sizes = [
            self._image_sizes[task_data["server_files[0]"]],
-            self._image_sizes[task_data["server_files[1]"]],
            self._image_sizes[task_data["server_files[2]"]],
+            self._image_sizes[task_data["server_files[1]"]],
        ]

        self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET,
@@ -3381,6 +3437,44 @@ class TaskDataAPITestCase(APITestCase):
        self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET,
            image_sizes, StorageMethodChoice.CACHE, StorageChoice.LOCAL)

+        # test predefined sorting
+        task_spec.update([('name', 'task custom data sequence #28')])
+        task_data = {
+            "server_files[0]": "test_1.jpg",
+            "server_files[1]": "test_3.jpg",
+            "server_files[2]": "test_2.jpg",
+            "image_quality": 70,
+            "use_cache": True,
+            "sorting_method": SortingMethod.PREDEFINED
+        }
+        image_sizes = [
+            self._image_sizes[task_data["server_files[0]"]],
+            self._image_sizes[task_data["server_files[1]"]],
+            self._image_sizes[task_data["server_files[2]"]],
+        ]
+
+        self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET,
+            image_sizes, StorageMethodChoice.CACHE, StorageChoice.SHARE)
+
+        # test a natural data sequence
+        task_spec.update([('name', 'task native data sequence #29')])
+        task_data = {
+            "server_files[0]": "test_10.jpg",
+            "server_files[1]": "test_2.jpg",
+            "server_files[2]": "test_1.jpg",
+            "image_quality": 70,
+            "use_cache": True,
+            "sorting_method": SortingMethod.NATURAL
+        }
+        image_sizes = [
+            self._image_sizes[task_data["server_files[2]"]],
+            self._image_sizes[task_data["server_files[1]"]],
+            self._image_sizes[task_data["server_files[0]"]],
+        ]
+
+        self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET,
+            image_sizes, StorageMethodChoice.CACHE, StorageChoice.SHARE)
+
    def test_api_v1_tasks_id_data_admin(self):
        self._test_api_v1_tasks_id_data(self.admin)


--- a/cvat/apps/engine/utils.py
+++ b/cvat/apps/engine/utils.py
@@ -105,4 +105,4 @@ def parse_specific_attributes(specific_attributes):
    return {
        item.split('=')[0].strip(): item.split('=')[1].strip()
            for item in specific_attributes.split('&')
-    } if specific_attributes else dict()
\ No newline at end of file
+    } if specific_attributes else dict()
--- a/cvat/apps/engine/views.py
+++ b/cvat/apps/engine/views.py
@@ -637,9 +637,8 @@ class TaskViewSet(auth.TaskGetQuerySetMixin, viewsets.ModelViewSet):
            db_task.data = db_data
            db_task.save()
            data = {k:v for k, v in serializer.data.items()}
-            data['use_zip_chunks'] = serializer.validated_data['use_zip_chunks']
-            data['use_cache'] = serializer.validated_data['use_cache']
-            data['copy_data'] = serializer.validated_data['copy_data']
+            for extra_key in { 'use_zip_chunks', 'use_cache', 'copy_data' }:
+                data[extra_key] = serializer.validated_data[extra_key]
            if data['use_cache']:
                db_task.data.storage_method = StorageMethodChoice.CACHE
                db_task.data.save(update_fields=['storage_method'])

--- a/cvat/requirements/base.txt
+++ b/cvat/requirements/base.txt
@@ -55,3 +55,4 @@ google-cloud-storage==1.42.0
 # when pycocotools is installed by wheel in python 3.8+
 datumaro==0.2.0 --no-binary=datumaro --no-binary=pycocotools
 urllib3>=1.26.5 # not directly required, pinned by Snyk to avoid a vulnerability
+natsort==8.0.0
--- a/site/content/en/docs/manual/basics/creating_an_annotation_task.md
+++ b/site/content/en/docs/manual/basics/creating_an_annotation_task.md
@@ -125,7 +125,15 @@ To create a 3D task, you need to use the following directory structures:

 ## Advanced configuration

-![](/images/image128_use_cache.jpg)
+![](/images/image128.jpg)
+
+### Sorting method
+
+Option to sort the data. It is not relevant for videos.
+For example, the sequence `2.jpeg, 10.jpeg, 1.jpeg` after sorting will be:
+- `lexicographical`: 1.jpeg, 10.jpeg, 2.jpeg
+- `natural`: 1.jpeg, 2.jpeg, 10.jpeg
+- `predefined`: 2.jpeg, 10.jpeg, 1.jpeg

 ### Use zip chunks


--- a/site/content/en/images/image128.jpg
+++ b/site/content/en/images/image128.jpg
--- a/site/content/en/images/image128_use_cache.jpg
+++ b/site/content/en/images/image128_use_cache.jpg
--- a/utils/cli/core/core.py
+++ b/utils/cli/core/core.py
@@ -45,6 +45,8 @@ class CLI():
            data['copy_data'] = kwargs.get('copy_data')
        if 'use_cache' in kwargs:
            data['use_cache'] = kwargs.get('use_cache')
+        if 'sorting_method' in kwargs:
+            data['sorting_method'] = kwargs.get('sorting_method')

        response = self.session.post(url, data=data, files=files)
        response.raise_for_status()

--- a/utils/cli/core/definition.py
+++ b/utils/cli/core/definition.py
@@ -208,6 +208,13 @@ task_create_parser.add_argument(
    action='store_false',
    help='''set the option to use the cache (default: %(default)s)'''
 )
+task_create_parser.add_argument(
+    '--sorting-method',
+    default='lexicographical',
+    choices=['lexicographical', 'natural', 'predefined', 'random'],
+    help='''data soring method (default: %(default)s)'''
+)
+
 #######################################################################
 # Delete
 #######################################################################

--- a/utils/dataset_manifest/core.py
+++ b/utils/dataset_manifest/core.py
@@ -10,7 +10,7 @@ from contextlib import closing
 from tempfile import NamedTemporaryFile

 from PIL import Image
-from .utils import md5_hash, rotate_image
+from .utils import md5_hash, rotate_image, sort, SortingMethod

 class VideoStreamReader:
    def __init__(self, source_path, chunk_size, force):
@@ -146,14 +146,14 @@ class DatasetImagesReader:
    def __init__(self,
                sources,
                meta=None,
-                is_sorted=True,
+                sorting_method=SortingMethod.PREDEFINED,
                use_image_hash=False,
                start = 0,
                step = 1,
                stop = None,
                *args,
                **kwargs):
-        self._sources = sources if is_sorted else sorted(sources)
+        self._sources = sort(sources, sorting_method)
        self._meta = meta
        self._data_dir = kwargs.get('data_dir', None)
        self._use_image_hash = use_image_hash
@@ -601,11 +601,18 @@ class ImageManifestManager(_ManifestManager):
        return (f"{image['name']}{image['extension']}" for _, image in self)

    def get_subset(self, subset_names):
-        return ({
-            'name': f"{image['name']}",
-            'extension': f"{image['extension']}",
-            'width': image['width'],
-            'height': image['height'],
-            'meta': image['meta'],
-            'checksum': f"{image['checksum']}"
-        } for _, image in self if f"{image['name']}{image['extension']}" in subset_names)
+        index_list = []
+        subset = []
+        for _, image in self:
+            image_name = f"{image['name']}{image['extension']}"
+            if image_name in subset_names:
+                index_list.append(subset_names.index(image_name))
+                subset.append({
+                    'name': f"{image['name']}",
+                    'extension': f"{image['extension']}",
+                    'width': image['width'],
+                    'height': image['height'],
+                    'meta': image['meta'],
+                    'checksum': f"{image['checksum']}"
+                })
+        return index_list, subset
--- a/utils/dataset_manifest/create.py
+++ b/utils/dataset_manifest/create.py
@@ -17,6 +17,8 @@ def get_args():
             'if by default the video does not meet the requirements and a manifest file is not prepared')
    parser.add_argument('--output-dir',type=str, help='Directory where the manifest file will be saved',
        default=os.getcwd())
+    parser.add_argument('--sorting', choices=['lexicographical', 'natural', 'predefined', 'random'],
+                        type=str, default='lexicographical')
    parser.add_argument('source', type=str, help='Source paths')
    return parser.parse_args()

@@ -63,7 +65,7 @@ def main():
        try:
            assert len(sources), 'A images was not found'
            manifest = ImageManifestManager(manifest_path=manifest_directory)
-            manifest.link(sources=sources, meta=meta, is_sorted=False,
+            manifest.link(sources=sources, meta=meta, sorting_method=args.sorting,
                    use_image_hash=True, data_dir=data_dir)
            manifest.create(_tqdm=tqdm)
        except Exception as ex:

--- a/utils/dataset_manifest/requirements.txt
+++ b/utils/dataset_manifest/requirements.txt
 av==8.0.2 --no-binary=av
 opencv-python-headless==4.4.0.42
 Pillow==7.2.0
-tqdm==4.58.0
\ No newline at end of file
+tqdm==4.58.0
+natsort==8.0.0
--- a/utils/dataset_manifest/utils.py
+++ b/utils/dataset_manifest/utils.py
 # Copyright (C) 2021 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
+
 import os
 import re
 import hashlib
 import mimetypes
 import cv2 as cv
 from av import VideoFrame
+from enum import Enum
+from natsort import os_sorted
+from random import shuffle

 def rotate_image(image, angle):
    height, width = image.shape[:2]
@@ -187,3 +191,29 @@ def detect_related_images(image_paths, root_path):
    elif data_are_3d:
        return _detect_related_images_3D(image_paths, root_path)
    return {}
+
+class SortingMethod(str, Enum):
+    LEXICOGRAPHICAL = 'lexicographical'
+    NATURAL = 'natural'
+    PREDEFINED = 'predefined'
+    RANDOM = 'random'
+
+    @classmethod
+    def choices(cls):
+        return tuple((x.value, x.name) for x in cls)
+
+    def __str__(self):
+        return self.value
+
+def sort(images, sorting_method=SortingMethod.LEXICOGRAPHICAL, func=None):
+    if sorting_method == SortingMethod.LEXICOGRAPHICAL:
+        return sorted(images, key=func)
+    elif sorting_method == SortingMethod.NATURAL:
+        return os_sorted(images, key=func)
+    elif sorting_method == SortingMethod.PREDEFINED:
+        return images
+    elif sorting_method == SortingMethod.RANDOM:
+        shuffle(images)
+        return images
+    else:
+        raise NotImplementedError()