未验证 提交 28aec51a 编写于 作者: R Roman Donchenko 提交者: GitHub

Set a custom User-Agent for queries made by CVAT and CVAT SDK (#5705)

This lets us:

* Download images from hosts that block generic user agents (notably,
Wikimedia sites).

* Easily distinguish queries to `app.cvat.ai` made by CVAT SDK (for
analytics).

Also as a consequence of the implementation, CVAT can now reuse the
connection when downloading multiple images from the same host.

I can't really think of any benefits from using the custom user agent
for types of requests that I didn't mention (i.e. hooks, OPA, nuclio),
but for consistency I updated them all.
上级 b9359479
......@@ -50,6 +50,8 @@ Tracks can be exported/imported to/from Datumaro and Sly Pointcloud formats (<ht
(<https://github.com/opencv/cvat/pull/5662>)
- Hide analytics link from non-admin users (<https://github.com/opencv/cvat/pull/5789>)
- Hide notifications on login/logout/register (<https://github.com/opencv/cvat/pull/5788>)
- CVAT and CVAT SDK now use a custom `User-Agent` header in HTTP requests
(<https://github.com/opencv/cvat/issues/5598>)
### Deprecated
- TBD
......
......@@ -28,6 +28,7 @@ docker run --rm -v "$DST_DIR:/local" -u "$(id -u)":"$(id -g)" \
-i "/local/schema/schema.yml" \
--config "/local/${TEMPLATE_DIR_NAME}/generator-config.yml" \
-p "packageVersion=$VERSION" \
-p "httpUserAgent=cvat_sdk/$VERSION" \
-g python \
-o "/local/"
......
......@@ -16,7 +16,6 @@ import shutil
from distutils.dir_util import copy_tree
from urllib import parse as urlparse
from urllib import request as urlrequest
import requests
import ipaddress
import dns.resolver
import django_rq
......@@ -31,6 +30,7 @@ from cvat.apps.engine.log import slogger
from cvat.apps.engine.media_extractors import (MEDIA_TYPES, Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter,
ValidateDimension, ZipChunkWriter, ZipCompressedChunkWriter, get_mime, sort)
from cvat.apps.engine.utils import av_scan_paths, get_rq_job_meta
from cvat.utils.http import make_requests_session
from utils.dataset_manifest import ImageManifestManager, VideoManifestManager, is_manifest
from utils.dataset_manifest.core import VideoManifestValidator
from utils.dataset_manifest.utils import detect_related_images
......@@ -343,24 +343,26 @@ def _validate_url(url):
def _download_data(urls, upload_dir):
job = rq.get_current_job()
local_files = {}
for url in urls:
name = os.path.basename(urlrequest.url2pathname(urlparse.urlparse(url).path))
if name in local_files:
raise Exception("filename collision: {}".format(name))
_validate_url(url)
slogger.glob.info("Downloading: {}".format(url))
job.meta['status'] = '{} is being downloaded..'.format(url)
job.save_meta()
response = requests.get(url, stream=True)
if response.status_code == 200:
response.raw.decode_content = True
with open(os.path.join(upload_dir, name), 'wb') as output_file:
shutil.copyfileobj(response.raw, output_file)
else:
raise Exception("Failed to download " + url)
with make_requests_session() as session:
for url in urls:
name = os.path.basename(urlrequest.url2pathname(urlparse.urlparse(url).path))
if name in local_files:
raise Exception("filename collision: {}".format(name))
_validate_url(url)
slogger.glob.info("Downloading: {}".format(url))
job.meta['status'] = '{} is being downloaded..'.format(url)
job.save_meta()
response = session.get(url, stream=True)
if response.status_code == 200:
response.raw.decode_content = True
with open(os.path.join(upload_dir, name), 'wb') as output_file:
shutil.copyfileobj(response.raw, output_file)
else:
raise Exception("Failed to download " + url)
local_files[name] = True
local_files[name] = True
return list(local_files.keys())
......
......@@ -12,14 +12,17 @@ from health_check.exceptions import ServiceReturnedUnexpectedResult, ServiceUnav
from django.conf import settings
from django.core.cache import CacheKeyWarning, caches
from cvat.utils.http import make_requests_session
class OPAHealthCheck(BaseHealthCheckBackend):
critical_service = True
def check_status(self):
opa_health_url = f'{settings.IAM_OPA_HOST}/health?bundles'
try:
response = requests.get(opa_health_url)
response.raise_for_status()
with make_requests_session() as session:
response = session.get(opa_health_url)
response.raise_for_status()
except requests.RequestException as e:
raise HealthCheckException(str(e))
......
......@@ -11,7 +11,6 @@ from collections import namedtuple
from enum import Enum
from typing import Any, List, Optional, Sequence, cast
import requests
from attrs import define, field
from django.conf import settings
from django.db.models import Q
......@@ -21,6 +20,7 @@ from rest_framework.permissions import BasePermission
from cvat.apps.organizations.models import Membership, Organization
from cvat.apps.engine.models import Label, Project, Task, Job, Issue
from cvat.apps.webhooks.models import WebhookTypeChoice
from cvat.utils.http import make_requests_session
class StrEnum(str, Enum):
......@@ -107,8 +107,9 @@ class OpenPolicyAgentPermission(metaclass=ABCMeta):
return None
def check_access(self) -> PermissionResult:
response = requests.post(self.url, json=self.payload)
output = response.json()['result']
with make_requests_session() as session:
response = session.post(self.url, json=self.payload)
output = response.json()['result']
allow = False
reasons = []
......@@ -124,14 +125,17 @@ class OpenPolicyAgentPermission(metaclass=ABCMeta):
def filter(self, queryset):
url = self.url.replace('/allow', '/filter')
r = requests.post(url, json=self.payload)
with make_requests_session() as session:
r = session.post(url, json=self.payload).json()['result']
q_objects = []
ops_dict = {
'|': operator.or_,
'&': operator.and_,
'~': operator.not_,
}
for item in r.json()['result']:
for item in r:
if isinstance(item, str):
val1 = q_objects.pop()
if item == '~':
......@@ -384,9 +388,12 @@ class EventsPermission(OpenPolicyAgentPermission):
def filter(self, query_params):
url = self.url.replace('/allow', '/filter')
r = requests.post(url, json=self.payload)
with make_requests_session() as session:
r = session.post(url, json=self.payload).json()['result']
filter_params = query_params.copy()
for query in r.json()['result']:
for query in r:
for attr, value in query.items():
if filter_params.get(attr, value) != value:
raise PermissionDenied(f"You don't have permission to view events with {attr}={filter_params.get(attr)}")
......
......@@ -26,6 +26,7 @@ from cvat.apps.engine.frame_provider import FrameProvider
from cvat.apps.engine.models import Job, Task
from cvat.apps.engine.serializers import LabeledDataSerializer
from cvat.apps.engine.models import ShapeType, SourceType
from cvat.utils.http import make_requests_session
from drf_spectacular.utils import (extend_schema, extend_schema_view,
OpenApiResponse, OpenApiParameter, inline_serializer)
......@@ -65,10 +66,11 @@ class LambdaGateway:
else:
url = NUCLIO_GATEWAY
reply = getattr(requests, method)(url, headers=extra_headers,
timeout=NUCLIO_TIMEOUT, json=data)
reply.raise_for_status()
response = reply.json()
with make_requests_session() as session:
reply = session.request(method, url, headers=extra_headers,
timeout=NUCLIO_TIMEOUT, json=data)
reply.raise_for_status()
response = reply.json()
return response
......@@ -97,9 +99,11 @@ class LambdaGateway:
url = f'http://host.docker.internal:{func.port}'
else:
url = f'http://localhost:{func.port}'
reply = requests.post(url, timeout=NUCLIO_TIMEOUT, json=payload)
reply.raise_for_status()
response = reply.json()
with make_requests_session() as session:
reply = session.post(url, timeout=NUCLIO_TIMEOUT, json=payload)
reply.raise_for_status()
response = reply.json()
return response
......
......@@ -15,6 +15,7 @@ from django.conf import settings
from cvat.apps.engine.models import Project
from cvat.apps.engine.serializers import BasicUserSerializer
from cvat.apps.organizations.models import Organization
from cvat.utils.http import make_requests_session
from .event_type import EventTypeChoice, event_name
from .models import Webhook, WebhookDelivery, WebhookTypeChoice
......@@ -43,16 +44,17 @@ def send_webhook(webhook, payload, delivery):
response_body = None
try:
response = requests.post(
webhook.target_url,
json=payload,
verify=webhook.enable_ssl,
headers=headers,
timeout=WEBHOOK_TIMEOUT,
stream=True,
)
status_code = response.status_code
response_body = response.raw.read(RESPONSE_SIZE_LIMIT + 1, decode_content=True)
with make_requests_session() as session:
response = session.post(
webhook.target_url,
json=payload,
verify=webhook.enable_ssl,
headers=headers,
timeout=WEBHOOK_TIMEOUT,
stream=True,
)
status_code = response.status_code
response_body = response.raw.read(RESPONSE_SIZE_LIMIT + 1, decode_content=True)
except requests.ConnectionError:
status_code = HTTPStatus.BAD_GATEWAY
except requests.Timeout:
......
# Copyright (C) 2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
# Copyright (C) 2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
import requests
import requests.utils
from cvat import __version__
_CVAT_USER_AGENT = f"CVAT/{__version__} {requests.utils.default_user_agent()}"
def make_requests_session() -> requests.Session:
session = requests.Session()
session.headers['User-Agent'] = _CVAT_USER_AGENT
return session
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册