Added ability to correct upload video with a rotation record in the metadata (#2218)

* Added ability to correct upload video with a rotation record in the metadata * fix sizes of rotated preview * fix sizes of rotated frame * Added tests for uploaded video with rotation record in metadata * Used OpenCV instead of PIL * Fixed tests * Update CHANGELOG * fix * Moved function Co-authored-by: N Nikita Manovich <nikita.manovich@intel.com>

Added ability to correct upload video with a rotation record in the metadata (#2218)
* Added ability to correct upload video with a rotation record in the metadata * fix sizes of rotated preview * fix sizes of rotated frame * Added tests for uploaded video with rotation record in metadata * Used OpenCV instead of PIL * Fixed tests * Update CHANGELOG * fix * Moved function Co-authored-by: N Nikita Manovich <nikita.manovich@intel.com>
bbfa880d · Maria Khrustaleva · GitHub · fd767f95 · bbfa880d · bbfa880d
7 changed file
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,7 +8,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [1.2.0] - Unreleased

 ### Added
-
 - Removed Z-Order flag from task creation process
 - Ability to login into CVAT-UI with token from api/v1/auth/login (<https://github.com/openvinotoolkit/cvat/pull/2234>)
 - Added layout grids toggling ('ctrl + alt + Enter')
@@ -29,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Ability to upload prepared meta information along with a video when creating a task (<https://github.com/openvinotoolkit/cvat/pull/2217>)
 - Optional chaining plugin for cvat-canvas and cvat-ui (<https://github.com/openvinotoolkit/cvat/pull/2249>)
 - MOTS png mask format support (<https://github.com/openvinotoolkit/cvat/pull/2198>)
+- Ability to correct upload video with a rotation record in the metadata (<https://github.com/openvinotoolkit/cvat/pull/2218>)

 ### Changed


--- a/cvat/apps/engine/media_extractors.py
+++ b/cvat/apps/engine/media_extractors.py
@@ -14,6 +14,7 @@ import av
 import numpy as np
 from pyunpack import Archive
 from PIL import Image, ImageFile
+from cvat.apps.engine.utils import rotate_image

 # fixes: "OSError:broken data stream" when executing line 72 while loading images downloaded from the web
 # see: https://stackoverflow.com/questions/42462431/oserror-broken-data-stream-when-reading-image-file
@@ -228,6 +229,16 @@ class VideoReader(IMediaReader):
                for image in packet.decode():
                    frame_num += 1
                    if self._has_frame(frame_num - 1):
+                        if packet.stream.metadata.get('rotate'):
+                            old_image = image
+                            image = av.VideoFrame().from_ndarray(
+                                rotate_image(
+                                    image.to_ndarray(format='bgr24'),
+                                    360 - int(container.streams.video[0].metadata.get('rotate'))
+                                ),
+                                format ='bgr24'
+                            )
+                            image.pts = old_image.pts
                        yield (image, self._source_path[0], image.pts)

    def __iter__(self):
@@ -252,7 +263,15 @@ class VideoReader(IMediaReader):
        container = self._get_av_container()
        stream = container.streams.video[0]
        preview = next(container.decode(stream))
-        return self._get_preview(preview.to_image())
+        return self._get_preview(preview.to_image() if not stream.metadata.get('rotate') \
+            else av.VideoFrame().from_ndarray(
+                rotate_image(
+                    preview.to_ndarray(format='bgr24'),
+                    360 - int(container.streams.video[0].metadata.get('rotate'))
+                ),
+                format ='bgr24'
+            ).to_image()
+        )

    def get_image_size(self, i):
        image = (next(iter(self)))[0]

--- a/cvat/apps/engine/prepare.py
+++ b/cvat/apps/engine/prepare.py
@@ -6,6 +6,7 @@ import av
 from collections import OrderedDict
 import hashlib
 import os
+from cvat.apps.engine.utils import rotate_image

 class WorkWithVideo:
    def __init__(self, **kwargs):
@@ -24,7 +25,6 @@ class WorkWithVideo:
        video_stream.thread_type = 'AUTO'
        return video_stream

-
 class AnalyzeVideo(WorkWithVideo):
    def check_type_first_frame(self):
        container = self._open_video_container(self.source_path, mode='r')
@@ -76,7 +76,17 @@ class PrepareInfo(WorkWithVideo):

    @property
    def frame_sizes(self):
+        container = self._open_video_container(self.source_path, 'r')
        frame = next(iter(self.key_frames.values()))
+        if container.streams.video[0].metadata.get('rotate'):
+            frame = av.VideoFrame().from_ndarray(
+                rotate_image(
+                    frame.to_ndarray(format='bgr24'),
+                    360 - int(container.streams.video[0].metadata.get('rotate'))
+                ),
+                format ='bgr24'
+            )
+        self._close_video_container(container)
        return (frame.width, frame.height)

    def check_key_frame(self, container, video_stream, key_frame):
@@ -150,6 +160,14 @@ class PrepareInfo(WorkWithVideo):
                if frame_number < start_chunk_frame_number:
                    continue
                elif frame_number < end_chunk_frame_number and not ((frame_number - start_chunk_frame_number) % step):
+                    if video_stream.metadata.get('rotate'):
+                        frame = av.VideoFrame().from_ndarray(
+                            rotate_image(
+                                frame.to_ndarray(format='bgr24'),
+                                360 - int(container.streams.video[0].metadata.get('rotate'))
+                            ),
+                            format ='bgr24'
+                        )
                    yield frame
                elif (frame_number - start_chunk_frame_number) % step:
                    continue
@@ -177,6 +195,14 @@ class UploadedMeta(PrepareInfo):
        container.seek(offset=next(iter(self.key_frames.values())), stream=video_stream)
        for packet in container.demux(video_stream):
            for frame in packet.decode():
+                if video_stream.metadata.get('rotate'):
+                    frame = av.VideoFrame().from_ndarray(
+                        rotate_image(
+                            frame.to_ndarray(format='bgr24'),
+                            360 - int(container.streams.video[0].metadata.get('rotate'))
+                        ),
+                        format ='bgr24'
+                    )
                self._close_video_container(container)
                return (frame.width, frame.height)


--- a/cvat/apps/engine/task.py
+++ b/cvat/apps/engine/task.py
@@ -294,6 +294,7 @@ def _create_thread(tid, data):

    if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE:
       for media_type, media_files in media.items():
+
            if not media_files:
                continue


--- a/cvat/apps/engine/tests/assets/test_rotated_90_video.mp4
+++ b/cvat/apps/engine/tests/assets/test_rotated_90_video.mp4
--- a/cvat/apps/engine/tests/test_rest_api.py
+++ b/cvat/apps/engine/tests/test_rest_api.py
@@ -1548,6 +1548,16 @@ class TaskDataAPITestCase(APITestCase):
            video.write(data.read())
        cls._image_sizes[filename] = img_sizes

+        filename = "test_rotated_90_video.mp4"
+        path = os.path.join(os.path.dirname(__file__), 'assets', 'test_rotated_90_video.mp4')
+        container = av.open(path, 'r')
+        for frame in container.decode(video=0):
+            # pyav ignores rotation record in metadata when decoding frames
+            img_sizes = [(frame.height, frame.width)] * container.streams.video[0].frames
+            break
+        container.close()
+        cls._image_sizes[filename] = img_sizes
+
        filename = os.path.join("videos", "test_video_1.mp4")
        path = os.path.join(settings.SHARE_ROOT, filename)
        os.makedirs(os.path.dirname(path))
@@ -2003,7 +2013,7 @@ class TaskDataAPITestCase(APITestCase):
            os.path.join(settings.SHARE_ROOT, "videos")
        )
        task_spec = {
-            "name": "my video with meta info task #11",
+            "name": "my video with meta info task #13",
            "overlap": 0,
            "segment_size": 0,
            "labels": [
@@ -2022,6 +2032,47 @@ class TaskDataAPITestCase(APITestCase):
        self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.VIDEO,
                                            self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.CACHE)

+        task_spec = {
+            "name": "my cached video task #14",
+            "overlap": 0,
+            "segment_size": 0,
+            "labels": [
+                {"name": "car"},
+                {"name": "person"},
+            ]
+        }
+
+        task_data = {
+            "client_files[0]": open(os.path.join(os.path.dirname(__file__), 'assets', 'test_rotated_90_video.mp4'), 'rb'),
+            "image_quality": 70,
+            "use_zip_chunks": True
+        }
+
+        image_sizes = self._image_sizes['test_rotated_90_video.mp4']
+        self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET,
+            self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.FILE_SYSTEM)
+
+        task_spec = {
+            "name": "my video task #15",
+            "overlap": 0,
+            "segment_size": 0,
+            "labels": [
+                {"name": "car"},
+                {"name": "person"},
+            ]
+        }
+
+        task_data = {
+            "client_files[0]": open(os.path.join(os.path.dirname(__file__), 'assets', 'test_rotated_90_video.mp4'), 'rb'),
+            "image_quality": 70,
+            "use_cache": True,
+            "use_zip_chunks": True
+        }
+
+        image_sizes = self._image_sizes['test_rotated_90_video.mp4']
+        self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET,
+            self.ChunkType.VIDEO, image_sizes, StorageMethodChoice.CACHE)
+
    def test_api_v1_tasks_id_data_admin(self):
        self._test_api_v1_tasks_id_data(self.admin)


--- a/cvat/apps/engine/utils.py
+++ b/cvat/apps/engine/utils.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: MIT

 import ast
+import cv2 as cv
 from collections import namedtuple
 import importlib
 import sys
@@ -74,3 +75,16 @@ def av_scan_paths(*paths):
        res = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        if res.returncode:
            raise ValidationError(res.stdout)
+
+def rotate_image(image, angle):
+    height, width = image.shape[:2]
+    image_center = (width/2, height/2)
+    matrix = cv.getRotationMatrix2D(image_center, angle, 1.)
+    abs_cos = abs(matrix[0,0])
+    abs_sin = abs(matrix[0,1])
+    bound_w = int(height * abs_sin + width * abs_cos)
+    bound_h = int(height * abs_cos + width * abs_sin)
+    matrix[0, 2] += bound_w/2 - image_center[0]
+    matrix[1, 2] += bound_h/2 - image_center[1]
+    matrix = cv.warpAffine(image, matrix, (bound_w, bound_h))
+    return matrix