未验证 提交 6fad1764 编写于 作者: A Andrey Zhavoronkov 提交者: GitHub

Added validation for URLs which used as remote data source (#4387)

上级 a3bc6ed2
...@@ -81,6 +81,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ...@@ -81,6 +81,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Security ### Security
- Updated ELK to 6.8.23 which uses log4j 2.17.1 (<https://github.com/openvinotoolkit/cvat/pull/4206>) - Updated ELK to 6.8.23 which uses log4j 2.17.1 (<https://github.com/openvinotoolkit/cvat/pull/4206>)
- Added validation for URLs which used as remote data source (<https://github.com/openvinotoolkit/cvat/pull/4387>)
## \[1.7.0] - 2021-11-15 ## \[1.7.0] - 2021-11-15
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
import itertools import itertools
import os import os
import sys import sys
from rest_framework.serializers import ValidationError
import rq import rq
import re import re
import shutil import shutil
...@@ -14,6 +15,8 @@ from traceback import print_exception ...@@ -14,6 +15,8 @@ from traceback import print_exception
from urllib import parse as urlparse from urllib import parse as urlparse
from urllib import request as urlrequest from urllib import request as urlrequest
import requests import requests
import ipaddress
import dns.resolver
import django_rq import django_rq
from django.conf import settings from django.conf import settings
...@@ -203,6 +206,45 @@ def _validate_manifest(manifests, root_dir): ...@@ -203,6 +206,45 @@ def _validate_manifest(manifests, root_dir):
raise Exception('Invalid manifest was uploaded') raise Exception('Invalid manifest was uploaded')
return None return None
def _validate_url(url):
def _validate_ip_address(ip_address):
if not ip_address.is_global:
raise ValidationError('Non public IP address \'{}\' is provided!'.format(ip_address))
ALLOWED_SCHEMES = ['http', 'https']
parsed_url = urlparse.urlparse(url)
if parsed_url.scheme not in ALLOWED_SCHEMES:
raise ValueError('Unsupported URL sheme: {}. Only http and https are supported'.format(parsed_url.scheme))
try:
ip_address = ipaddress.ip_address(parsed_url.hostname)
_validate_ip_address(ip_address)
except ValueError as _:
ip_v4_records = None
ip_v6_records = None
try:
ip_v4_records = dns.resolver.query(parsed_url.hostname, 'A')
for record in ip_v4_records:
_validate_ip_address(ipaddress.ip_address(record.to_text()))
except ValidationError:
raise
except Exception as e:
slogger.glob.info('Cannot get A record for domain \'{}\': {}'.format(parsed_url.hostname, e))
try:
ip_v6_records = dns.resolver.query(parsed_url.hostname, 'AAAA')
for record in ip_v6_records:
_validate_ip_address(ipaddress.ip_address(record.to_text()))
except ValidationError:
raise
except Exception as e:
slogger.glob.info('Cannot get AAAA record for domain \'{}\': {}'.format(parsed_url.hostname, e))
if not ip_v4_records and not ip_v6_records:
raise ValidationError('Cannot resolve IP address for domain \'{}\''.format(parsed_url.hostname))
def _download_data(urls, upload_dir): def _download_data(urls, upload_dir):
job = rq.get_current_job() job = rq.get_current_job()
local_files = {} local_files = {}
...@@ -210,6 +252,7 @@ def _download_data(urls, upload_dir): ...@@ -210,6 +252,7 @@ def _download_data(urls, upload_dir):
name = os.path.basename(urlrequest.url2pathname(urlparse.urlparse(url).path)) name = os.path.basename(urlrequest.url2pathname(urlparse.urlparse(url).path))
if name in local_files: if name in local_files:
raise Exception("filename collision: {}".format(name)) raise Exception("filename collision: {}".format(name))
_validate_url(url)
slogger.glob.info("Downloading: {}".format(url)) slogger.glob.info("Downloading: {}".format(url))
job.meta['status'] = '{} is being downloaded..'.format(url) job.meta['status'] = '{} is being downloaded..'.format(url)
job.save_meta() job.save_meta()
......
...@@ -52,3 +52,4 @@ datumaro==0.2.0 --no-binary=datumaro ...@@ -52,3 +52,4 @@ datumaro==0.2.0 --no-binary=datumaro
urllib3>=1.26.5 # not directly required, pinned by Snyk to avoid a vulnerability urllib3>=1.26.5 # not directly required, pinned by Snyk to avoid a vulnerability
natsort==8.0.0 natsort==8.0.0
mistune>=2.0.1 # not directly required, pinned by Snyk to avoid a vulnerability mistune>=2.0.1 # not directly required, pinned by Snyk to avoid a vulnerability
dnspython==2.2.0
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册