Fix task mode in tasks restored from backup (#6216)

- Fixes the problem with the task mode described in the CVAT corrupts annotations when exported with CVAT for video (#5668) - Fixes the problem with failing backup import because of the manifest file existence from Cannot export dataset (KeyError: 'outside') / can't import backup from older version (#5971) - Cleaned test assets (removed extra directories from unexistent tasks and jobs)

Fix task mode in tasks restored from backup (#6216)
- Fixes the problem with the task mode described in the CVAT corrupts annotations when exported with CVAT for video (#5668) - Fixes the problem with failing backup import because of the manifest file existence from Cannot export dataset (KeyError: 'outside') / can't import backup from older version (#5971) - Cleaned test assets (removed extra directories from unexistent tasks and jobs)
38df1cfe · Maxim Zhiltsov · GitHub · 24013a68 · 38df1cfe · 38df1cfe
4 changed file
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 - Issues with running serverless models for EXIF-rotated images. (<https://github.com/opencv/cvat/pull/6275/>)
 - File uploading issues when using https configuration. (<https://github.com/opencv/cvat/pull/6308>)
+- The problem with manifest file in tasks restored from backup (<https://github.com/opencv/cvat/issues/5971>)
+- The problem with task mode in a task restored from backup (<https://github.com/opencv/cvat/issues/5668>)

 ### Security
 - TDB
@@ -56,6 +58,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Broken logging in the TransT serverless function
  (<https://github.com/opencv/cvat/pull/6290>)

+### Security
+- TDB
+
 ## \[2.4.5] - 2023-06-02
 ### Added
 - Integrated support for sharepoint and cloud storage files, along with

--- a/cvat/apps/engine/task.py
+++ b/cvat/apps/engine/task.py
@@ -313,6 +313,7 @@ def _validate_manifest(
    db_cloud_storage: Optional[Any],
    data_storage_method: str,
    data_sorting_method: str,
+    isBackupRestore: bool,
 ) -> Optional[str]:
    if manifests:
        if len(manifests) != 1:
@@ -331,15 +332,21 @@ def _validate_manifest(
        if is_manifest(full_manifest_path):
            if not (
                data_sorting_method == models.SortingMethod.PREDEFINED or
-                data_storage_method == models.StorageMethodChoice.CACHE and settings.USE_CACHE
+                (settings.USE_CACHE and data_storage_method == models.StorageMethodChoice.CACHE) or
+                isBackupRestore
            ):
+                cache_disabled_message = ""
                if data_storage_method == models.StorageMethodChoice.CACHE and not settings.USE_CACHE:
-                    slogger.glob.warning("This server doesn't allow to use cache for data. "
-                        "Please turn 'use cache' off and try to recreate the task")
+                    cache_disabled_message = (
+                        "This server doesn't allow to use cache for data. "
+                        "Please turn 'use cache' off and try to recreate the task"
+                    )
+                    slogger.glob.warning(cache_disabled_message)

                raise ValidationError(
                    "A manifest file can only be used with the 'use cache' option "
-                    "or when the 'sorting_method' == 'predefined'"
+                    "or when 'sorting_method' is 'predefined'" + \
+                    (". " + cache_disabled_message if cache_disabled_message else "")
                )
            return manifest_file

@@ -547,6 +554,7 @@ def _create_thread(
        db_cloud_storage=db_data.cloud_storage if is_data_in_cloud else None,
        data_storage_method=db_data.storage_method,
        data_sorting_method=data['sorting_method'],
+        isBackupRestore=isBackupRestore,
    )

    manifest = None

--- a/tests/python/rest_api/test_tasks.py
+++ b/tests/python/rest_api/test_tasks.py
@@ -1527,6 +1527,118 @@ class TestWorkWithTask:
                assert image_name in ex.body


+class TestTaskBackups:
+    def _make_client(self) -> Client:
+        return Client(BASE_URL, config=Config(status_check_period=0.01))
+
+    @pytest.fixture(autouse=True)
+    def setup(self, restore_db_per_function, restore_cvat_data, tmp_path: Path, admin_user: str):
+        self.tmp_dir = tmp_path
+
+        self.client = self._make_client()
+        self.user = admin_user
+
+        with self.client:
+            self.client.login((self.user, USER_PASS))
+
+    @pytest.mark.parametrize("mode", ["annotation", "interpolation"])
+    def test_can_export_backup(self, tasks, mode):
+        task_id = next(t for t in tasks if t["mode"] == mode)["id"]
+        task = self.client.tasks.retrieve(task_id)
+
+        filename = self.tmp_dir / f"task_{task.id}_backup.zip"
+        task.download_backup(filename)
+
+        assert filename.is_file()
+        assert filename.stat().st_size > 0
+
+    @pytest.mark.parametrize("mode", ["annotation", "interpolation"])
+    def test_can_import_backup(self, tasks, mode):
+        task_json = next(t for t in tasks if t["mode"] == mode)
+        self._test_can_restore_backup_task(task_json["id"])
+
+    @pytest.mark.parametrize("mode", ["annotation", "interpolation"])
+    def test_can_import_backup_for_task_in_nondefault_state(self, tasks, mode):
+        # Reproduces the problem with empty 'mode' in a restored task,
+        # described in the reproduction steps https://github.com/opencv/cvat/issues/5668
+
+        task_json = next(t for t in tasks if t["mode"] == mode and t["jobs"]["count"])
+
+        task = self.client.tasks.retrieve(task_json["id"])
+        jobs = task.get_jobs()
+        for j in jobs:
+            j.update({"stage": "validation"})
+
+        self._test_can_restore_backup_task(task_json["id"])
+
+    def _test_can_restore_backup_task(self, task_id: int):
+        task = self.client.tasks.retrieve(task_id)
+        (_, response) = self.client.api_client.tasks_api.retrieve(task_id)
+        task_json = json.loads(response.data)
+
+        filename = self.tmp_dir / f"task_{task.id}_backup.zip"
+        task.download_backup(filename)
+
+        restored_task = self.client.tasks.create_from_backup(filename)
+
+        old_jobs = task.get_jobs()
+        new_jobs = restored_task.get_jobs()
+        assert len(old_jobs) == len(new_jobs)
+
+        for old_job, new_job in zip(old_jobs, new_jobs):
+            assert old_job.status == new_job.status
+            assert old_job.start_frame == new_job.start_frame
+            assert old_job.stop_frame == new_job.stop_frame
+
+        (_, response) = self.client.api_client.tasks_api.retrieve(restored_task.id)
+        restored_task_json = json.loads(response.data)
+
+        assert restored_task_json["assignee"] is None
+        assert restored_task_json["owner"]["username"] == self.user
+        assert restored_task_json["id"] != task_json["id"]
+        assert restored_task_json["data"] != task_json["data"]
+        assert restored_task_json["organization"] is None
+        assert restored_task_json["data_compressed_chunk_type"] in ["imageset", "video"]
+        if task_json["jobs"]["count"] == 1:
+            assert restored_task_json["overlap"] == 0
+        else:
+            assert restored_task_json["overlap"] == task_json["overlap"]
+        assert restored_task_json["jobs"]["completed"] == 0
+        assert restored_task_json["jobs"]["validation"] == 0
+        assert restored_task_json["source_storage"] is None
+        assert restored_task_json["target_storage"] is None
+        assert restored_task_json["project_id"] is None
+
+        assert (
+            DeepDiff(
+                task_json,
+                restored_task_json,
+                ignore_order=True,
+                exclude_regex_paths=[
+                    r"root\['id'\]",  # id, must be different
+                    r"root\['created_date'\]",  # must be different
+                    r"root\['updated_date'\]",  # must be different
+                    r"root\['assignee'\]",  # id, depends on the situation
+                    r"root\['owner'\]",  # id, depends on the situation
+                    r"root\['data'\]",  # id, must be different
+                    r"root\['organization'\]",  # depends on the task setup
+                    r"root\['project_id'\]",  # should be dropped
+                    r"root(\['.*'\])*\['url'\]",  # depends on the task id
+                    r"root\['data_compressed_chunk_type'\]",  # depends on the server configuration
+                    r"root\['source_storage'\]",  # should be dropped
+                    r"root\['target_storage'\]",  # should be dropped
+                    r"root\['jobs'\]\['completed'\]",  # job statuses should be renewed
+                    r"root\['jobs'\]\['validation'\]",  # job statuses should be renewed
+                    # depends on the actual job configuration,
+                    # unlike to what is obtained from the regular task creation,
+                    # where the requested number is recorded
+                    r"root\['overlap'\]",
+                ],
+            )
+            == {}
+        )
+
+
 @pytest.mark.usefixtures("restore_db_per_function")
 class TestWorkWithGtJobs:
    def test_normal_and_gt_job_annotations_are_not_merged(

--- a/tests/python/shared/assets/cvat_db/cvat_data.tar.bz2
+++ b/tests/python/shared/assets/cvat_db/cvat_data.tar.bz2