From bceae228a79bfa149eda95403c4e0813d63b92aa Mon Sep 17 00:00:00 2001 From: Kirill Lakhov Date: Tue, 22 Mar 2022 23:23:43 +0300 Subject: [PATCH] Tus for project dataset (#4485) --- CHANGELOG.md | 1 + cvat-core/package-lock.json | 4 +- cvat-core/package.json | 2 +- cvat-core/src/server-proxy.js | 83 +++++++++++++++++++++------------- cvat/apps/engine/models.py | 3 ++ cvat/apps/engine/views.py | 85 ++++++++++++++++++++++------------- cvat/apps/iam/permissions.py | 2 + 7 files changed, 115 insertions(+), 65 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d8dd16242..aa212063f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Task annotations importing via chunk uploads () - Advanced filtration and sorting for a list of tasks/projects/cloudstorages () +- Project dataset importing via chunk uploads () ### Changed - Added missing geos dependency into Dockerfile () diff --git a/cvat-core/package-lock.json b/cvat-core/package-lock.json index 7a1cfa402..43b2323ca 100644 --- a/cvat-core/package-lock.json +++ b/cvat-core/package-lock.json @@ -1,12 +1,12 @@ { "name": "cvat-core", - "version": "5.0.0", + "version": "5.0.1", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "cvat-core", - "version": "5.0.0", + "version": "5.0.1", "license": "MIT", "dependencies": { "axios": "^0.21.4", diff --git a/cvat-core/package.json b/cvat-core/package.json index ac402b144..55f6408e9 100644 --- a/cvat-core/package.json +++ b/cvat-core/package.json @@ -1,6 +1,6 @@ { "name": "cvat-core", - "version": "5.0.0", + "version": "5.0.1", "description": "Part of Computer Vision Tool which presents an interface for client-side integration", "main": "babel.config.js", "scripts": { diff --git a/cvat-core/src/server-proxy.js b/cvat-core/src/server-proxy.js index f0a52b67a..3924f7bb7 100644 --- a/cvat-core/src/server-proxy.js +++ b/cvat-core/src/server-proxy.js @@ -75,7 +75,7 @@ onProgress(bytesUploaded) { if (onUpdate && Number.isInteger(totalSentSize) && Number.isInteger(totalSize)) { const currentUploadedSize = totalSentSize + bytesUploaded; - const percentage = currentUploadedSize / totalSize; + const percentage = Math.round(currentUploadedSize / totalSize); onUpdate(percentage); } }, @@ -612,41 +612,63 @@ } async function importDataset(id, format, file, onUpdate) { - const { backendAPI } = config; + const { backendAPI, origin } = config; + const params = { + ...enableOrganization(), + format, + filename: file.name, + }; + const uploadConfig = { + chunkSize: config.uploadChunkSize * 1024 * 1024, + endpoint: `${origin}${backendAPI}/projects/${id}/dataset/`, + totalSentSize: 0, + totalSize: file.size, + onUpdate: (percentage) => { + onUpdate('The dataset is being uploaded to the server', percentage); + }, + }; const url = `${backendAPI}/projects/${id}/dataset`; - const formData = new FormData(); - formData.append('dataset_file', file); - - return new Promise((resolve, reject) => { - async function requestStatus() { - try { - const response = await Axios.get(`${url}?action=import_status`, { - proxy: config.proxy, - }); - if (response.status === 202) { - if (onUpdate && response.data.message !== '') { - onUpdate(response.data.message, response.data.progress || 0); + try { + await Axios.post(url, + new FormData(), { + params, + proxy: config.proxy, + headers: { 'Upload-Start': true }, + }); + await chunkUpload(file, uploadConfig); + await Axios.post(url, + new FormData(), { + params, + proxy: config.proxy, + headers: { 'Upload-Finish': true }, + }); + return new Promise((resolve, reject) => { + async function requestStatus() { + try { + const response = await Axios.get(url, { + params: { ...params, action: 'import_status' }, + proxy: config.proxy, + }); + if (response.status === 202) { + if (onUpdate && response.data.message) { + onUpdate(response.data.message, response.data.progress || 0); + } + setTimeout(requestStatus, 3000); + } else if (response.status === 201) { + resolve(); + } else { + reject(generateError(response)); } - setTimeout(requestStatus, 3000); - } else if (response.status === 201) { - resolve(); - } else { - reject(generateError(response)); + } catch (error) { + reject(generateError(error)); } - } catch (error) { - reject(generateError(error)); } - } - - Axios.post(`${url}?format=${format}`, formData, { - proxy: config.proxy, - }).then(() => { setTimeout(requestStatus, 2000); - }).catch((error) => { - reject(generateError(error)); }); - }); + } catch (errorData) { + throw generateError(errorData); + } } async function exportTask(id) { @@ -1279,8 +1301,7 @@ setTimeout(requestStatus); }); } catch (errorData) { - generateError(errorData); - return null; + throw generateError(errorData); } } diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index 6ccb8cf28..540d8ee91 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -248,6 +248,9 @@ class Project(models.Model): def get_project_logs_dirname(self): return os.path.join(self.get_project_dirname(), 'logs') + def get_tmp_dirname(self): + return os.path.join(self.get_project_dirname(), "tmp") + def get_client_log_path(self): return os.path.join(self.get_project_logs_dirname(), "client.log") diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index cca75503b..64ff4f9a4 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -250,7 +250,7 @@ class ServerViewSet(viewsets.ViewSet): '200': ProjectSerializer, }) ) -class ProjectViewSet(viewsets.ModelViewSet): +class ProjectViewSet(viewsets.ModelViewSet, UploadMixin): queryset = models.Project.objects.prefetch_related(Prefetch('label_set', queryset=models.Label.objects.order_by('id') )) @@ -330,21 +330,13 @@ class ProjectViewSet(viewsets.ModelViewSet): '400': OpenApiResponse(description='Failed to import dataset'), '405': OpenApiResponse(description='Format is not available'), }) - @action(detail=True, methods=['GET', 'POST'], serializer_class=None, - url_path='dataset') + @action(detail=True, methods=['GET', 'POST', 'OPTIONS'], serializer_class=None, + url_path=r'dataset/?$') def dataset(self, request, pk): - db_project = self.get_object() # force to call check_object_permissions - - if request.method == 'POST': - format_name = request.query_params.get("format", "") + self._object = self.get_object() # force to call check_object_permissions - return _import_project_dataset( - request=request, - rq_id=f"/api/project/{pk}/dataset_import", - rq_func=dm.project.import_dataset_as_project, - pk=pk, - format_name=format_name, - ) + if request.method == 'POST' or request.method == 'OPTIONS': + return self.upload_data(request) else: action = request.query_params.get("action", "").lower() if action in ("import_status",): @@ -353,12 +345,12 @@ class ProjectViewSet(viewsets.ModelViewSet): if rq_job is None: return Response(status=status.HTTP_404_NOT_FOUND) elif rq_job.is_finished: - os.close(rq_job.meta['tmp_file_descriptor']) + if rq_job.meta['tmp_file_descriptor']: os.close(rq_job.meta['tmp_file_descriptor']) os.remove(rq_job.meta['tmp_file']) rq_job.delete() return Response(status=status.HTTP_201_CREATED) elif rq_job.is_failed: - os.close(rq_job.meta['tmp_file_descriptor']) + if rq_job.meta['tmp_file_descriptor']: os.close(rq_job.meta['tmp_file_descriptor']) os.remove(rq_job.meta['tmp_file']) rq_job.delete() return Response( @@ -373,7 +365,7 @@ class ProjectViewSet(viewsets.ModelViewSet): else: format_name = request.query_params.get("format", "") return _export_annotations( - db_instance=db_project, + db_instance=self._object, rq_id="/api/project/{}/dataset/{}".format(pk, format_name), request=request, action=action, @@ -382,6 +374,35 @@ class ProjectViewSet(viewsets.ModelViewSet): filename=request.query_params.get("filename", "").lower(), ) + def get_upload_dir(self): + if 'dataset' in self.action: + return self._object.get_tmp_dirname() + return "" + + def upload_finished(self, request): + if self.action == 'dataset': + format_name = request.query_params.get("format", "") + filename = request.query_params.get("filename", "") + tmp_dir = self._object.get_tmp_dirname() + uploaded_file = None + if os.path.isfile(os.path.join(tmp_dir, filename)): + uploaded_file = os.path.join(tmp_dir, filename) + return _import_project_dataset( + request=request, + filename=uploaded_file, + rq_id=f"/api/project/{self._object.pk}/dataset_import", + rq_func=dm.project.import_dataset_as_project, + pk=self._object.pk, + format_name=format_name, + ) + return Response(data='Unknown upload was finished', + status=status.HTTP_400_BAD_REQUEST) + + @action(detail=True, methods=['HEAD', 'PATCH'], url_path='dataset/'+UploadMixin.file_id_regex) + def append_dataset_chunk(self, request, pk, file_id): + self._object = self.get_object() + return self.append_tus_chunk(request, file_id) + @extend_schema(summary='Method allows to download project annotations', parameters=[ OpenApiParameter('format', description='Desired output format name\n' @@ -1768,7 +1789,7 @@ def _export_annotations(db_instance, rq_id, request, format_name, action, callba result_ttl=ttl, failure_ttl=ttl) return Response(status=status.HTTP_202_ACCEPTED) -def _import_project_dataset(request, rq_id, rq_func, pk, format_name): +def _import_project_dataset(request, rq_id, rq_func, pk, format_name, filename=None): format_desc = {f.DISPLAY_NAME: f for f in dm.views.get_import_formats()}.get(format_name) if format_desc is None: @@ -1781,19 +1802,21 @@ def _import_project_dataset(request, rq_id, rq_func, pk, format_name): rq_job = queue.fetch_job(rq_id) if not rq_job: - serializer = DatasetFileSerializer(data=request.data) - if serializer.is_valid(raise_exception=True): - dataset_file = serializer.validated_data['dataset_file'] - fd, filename = mkstemp(prefix='cvat_{}'.format(pk)) - with open(filename, 'wb+') as f: - for chunk in dataset_file.chunks(): - f.write(chunk) - - rq_job = queue.enqueue_call( - func=rq_func, - args=(pk, filename, format_name), - job_id=rq_id, - meta={ + fd = None + if not filename: + serializer = DatasetFileSerializer(data=request.data) + if serializer.is_valid(raise_exception=True): + dataset_file = serializer.validated_data['dataset_file'] + fd, filename = mkstemp(prefix='cvat_{}'.format(pk)) + with open(filename, 'wb+') as f: + for chunk in dataset_file.chunks(): + f.write(chunk) + + rq_job = queue.enqueue_call( + func=rq_func, + args=(pk, filename, format_name), + job_id=rq_id, + meta={ 'tmp_file': filename, 'tmp_file_descriptor': fd, }, diff --git a/cvat/apps/iam/permissions.py b/cvat/apps/iam/permissions.py index 48eba96ed..022a5cc46 100644 --- a/cvat/apps/iam/permissions.py +++ b/cvat/apps/iam/permissions.py @@ -492,6 +492,8 @@ class ProjectPermission(OpenPolicyAgentPermission): ('retrieve', 'GET'): 'view', ('tasks', 'GET'): 'view', ('dataset', 'POST'): 'import:dataset', + ('append_dataset_chunk', 'HEAD'): 'import:dataset', + ('append_dataset_chunk', 'PATCH'): 'import:dataset', ('annotations', 'GET'): 'export:annotations', ('dataset', 'GET'): 'export:dataset', ('export_backup', 'GET'): 'export:backup', -- GitLab