未验证 提交 6a29b344 编写于 作者: J Jason Wu 提交者: GitHub

Support of Google Cloud Storage for cloud storage (#3561)

* Support Google Cloud Storage for cloud provider

Implement GoogleCloudStorage and add KEY_FILE_PATH to
CredentialsTypeChoice, and key_file_path in Credentials

* Handle cloud storage key contains path seperator

* Update changelog

* Add the migration file for engine_cloud_provider table
上级 54468167
......@@ -50,6 +50,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Explicit "Done" button when drawing any polyshapes (<https://github.com/openvinotoolkit/cvat/pull/3417>)
- Histogram equalization with OpenCV javascript (<https://github.com/openvinotoolkit/cvat/pull/3447>)
- Client-side polyshapes approximation when using semi-automatic interactors & scissors (<https://github.com/openvinotoolkit/cvat/pull/3450>)
- Support of Google Cloud Storage for cloud storage (<https://github.com/openvinotoolkit/cvat/pull/3561>)
### Changed
......
......@@ -87,7 +87,7 @@ class CacheInteraction:
name = f"{item['name']}{item['extension']}"
if name not in cloud_storage_instance:
raise Exception('{} file was not found on a {} storage'.format(name, cloud_storage_instance.name))
with NamedTemporaryFile(mode='w+b', prefix='cvat', suffix=name, delete=False) as temp_file:
with NamedTemporaryFile(mode='w+b', prefix='cvat', suffix=name.replace(os.path.sep, '#'), delete=False) as temp_file:
source_path = temp_file.name
buf = cloud_storage_instance.download_fileobj(name)
temp_file.write(buf.getvalue())
......
#from dataclasses import dataclass
from abc import ABC, abstractmethod, abstractproperty
from io import BytesIO
import os
import os.path
import boto3
from boto3.s3.transfer import TransferConfig
......@@ -11,6 +13,8 @@ from azure.storage.blob import BlobServiceClient
from azure.core.exceptions import ResourceExistsError
from azure.storage.blob import PublicAccess
from google.cloud import storage
from cvat.apps.engine.log import slogger
from cvat.apps.engine.models import CredentialsTypeChoice, CloudProviderChoice
......@@ -42,6 +46,7 @@ class _CloudStorage(ABC):
def download_file(self, key, path):
file_obj = self.download_fileobj(key)
if isinstance(file_obj, BytesIO):
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'wb') as f:
f.write(file_obj.getvalue())
else:
......@@ -77,6 +82,14 @@ def get_cloud_storage_instance(cloud_provider, resource, credentials, specific_a
account_name=credentials.account_name,
sas_token=credentials.session_token
)
elif cloud_provider == CloudProviderChoice.GOOGLE_CLOUD_STORAGE:
instance = GoogleCloudStorage(
bucket_name=resource,
service_account_json=credentials.key_file_path,
prefix=specific_attributes.get('prefix'),
location=specific_attributes.get('location'),
project=specific_attributes.get('project')
)
else:
raise NotImplementedError()
return instance
......@@ -256,14 +269,89 @@ class AzureBlobContainer(_CloudStorage):
class GOOGLE_DRIVE(_CloudStorage):
pass
class GoogleCloudStorage(_CloudStorage):
def __init__(self, bucket_name, prefix=None, service_account_json=None, project=None, location=None):
super().__init__()
if service_account_json:
self._storage_client = storage.Client.from_service_account_json(service_account_json)
else:
self._storage_client = storage.Client()
bucket = self._storage_client.lookup_bucket(bucket_name)
if bucket is None:
bucket = self._storage_client.bucket(bucket_name, user_project=project)
self._bucket = bucket
self._bucket_location = location
self._prefix = prefix
@property
def bucket(self):
return self._bucket
@property
def name(self):
return self._bucket.name
def exists(self):
return self._storage_client.lookup_bucket(self.name) is not None
def initialize_content(self):
self._files = [
{
'name': blob.name
}
for blob in self._storage_client.list_blobs(
self.bucket, prefix=self._prefix
)
]
def download_fileobj(self, key):
buf = BytesIO()
blob = self.bucket.blob(key)
self._storage_client.download_blob_to_file(blob, buf)
buf.seek(0)
return buf
def is_object_exist(self, key):
return self.bucket.blob(key).exists()
def upload_file(self, file_obj, file_name):
self.bucket.blob(file_name).upload_from_file(file_obj)
def create(self):
try:
self._bucket = self._storage_client.create_bucket(
self.bucket,
location=self._bucket_location
)
slogger.glob.info(
'Bucket {} has been created at {} region for {}'.format(
self.name,
self.bucket.location,
self.bucket.user_project,
))
except Exception as ex:
msg = str(ex)
slogger.glob.info(msg)
raise Exception(msg)
def get_file_last_modified(self, key):
blob = self.bucket.blob(key)
blob.reload()
return blob.updated
class Credentials:
__slots__ = ('key', 'secret_key', 'session_token', 'account_name', 'credentials_type')
__slots__ = ('key', 'secret_key', 'session_token', 'account_name', 'key_file_path', 'credentials_type')
def __init__(self, **credentials):
self.key = credentials.get('key', '')
self.secret_key = credentials.get('secret_key', '')
self.session_token = credentials.get('session_token', '')
self.account_name = credentials.get('account_name', '')
self.key_file_path = credentials.get('key_file_path', '')
self.credentials_type = credentials.get('credentials_type', None)
def convert_to_db(self):
......@@ -271,6 +359,7 @@ class Credentials:
CredentialsTypeChoice.TEMP_KEY_SECRET_KEY_TOKEN_SET : \
" ".join([self.key, self.secret_key, self.session_token]),
CredentialsTypeChoice.ACCOUNT_NAME_TOKEN_PAIR : " ".join([self.account_name, self.session_token]),
CredentialsTypeChoice.KEY_FILE_PATH: self.key_file_path,
CredentialsTypeChoice.ANONYMOUS_ACCESS: "",
}
return converted_credentials[self.credentials_type]
......@@ -281,6 +370,8 @@ class Credentials:
self.key, self.secret_key, self.session_token = credentials.get('value').split()
elif self.credentials_type == CredentialsTypeChoice.ACCOUNT_NAME_TOKEN_PAIR:
self.account_name, self.session_token = credentials.get('value').split()
elif self.credentials_type == CredentialsTypeChoice.KEY_FILE_PATH:
self.key_file_path = credentials.get('value')
else:
self.account_name, self.session_token, self.key, self.secret_key = ('', '', '', '')
self.credentials_type = None
......@@ -291,6 +382,7 @@ class Credentials:
self.secret_key = credentials.get('secret_key', self.secret_key)
self.session_token = credentials.get('session_token', self.session_token)
self.account_name = credentials.get('account_name', self.account_name)
self.key_file_path = credentials.get('key_file_path', self.key_file_path)
def values(self):
return [self.key, self.secret_key, self.session_token, self.account_name]
return [self.key, self.secret_key, self.session_token, self.account_name, self.key_file_path]
# Generated by Django 3.1.13 on 2021-08-27 02:58
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('engine', '0040_cloud_storage'),
]
operations = [
migrations.AlterField(
model_name='cloudstorage',
name='credentials_type',
field=models.CharField(choices=[('TEMP_KEY_SECRET_KEY_TOKEN_SET', 'TEMP_KEY_SECRET_KEY_TOKEN_SET'), ('ACCOUNT_NAME_TOKEN_PAIR', 'ACCOUNT_NAME_TOKEN_PAIR'), ('KEY_FILE_PATH', 'KEY_FILE_PATH'), ('ANONYMOUS_ACCESS', 'ANONYMOUS_ACCESS')], max_length=29),
),
migrations.AlterField(
model_name='cloudstorage',
name='provider_type',
field=models.CharField(choices=[('AWS_S3_BUCKET', 'AWS_S3'), ('AZURE_CONTAINER', 'AZURE_CONTAINER'), ('GOOGLE_DRIVE', 'GOOGLE_DRIVE'), ('GOOGLE_CLOUD_STORAGE', 'GOOGLE_CLOUD_STORAGE')], max_length=20),
),
]
......@@ -542,6 +542,7 @@ class CloudProviderChoice(str, Enum):
AWS_S3 = 'AWS_S3_BUCKET'
AZURE_CONTAINER = 'AZURE_CONTAINER'
GOOGLE_DRIVE = 'GOOGLE_DRIVE'
GOOGLE_CLOUD_STORAGE = 'GOOGLE_CLOUD_STORAGE'
@classmethod
def choices(cls):
......@@ -558,6 +559,7 @@ class CredentialsTypeChoice(str, Enum):
# ignore bandit issues because false positives
TEMP_KEY_SECRET_KEY_TOKEN_SET = 'TEMP_KEY_SECRET_KEY_TOKEN_SET' # nosec
ACCOUNT_NAME_TOKEN_PAIR = 'ACCOUNT_NAME_TOKEN_PAIR' # nosec
KEY_FILE_PATH = 'KEY_FILE_PATH'
ANONYMOUS_ACCESS = 'ANONYMOUS_ACCESS'
@classmethod
......
......@@ -792,6 +792,7 @@ class CloudStorageSerializer(serializers.ModelSerializer):
session_token = serializers.CharField(max_length=440, allow_blank=True, required=False)
key = serializers.CharField(max_length=20, allow_blank=True, required=False)
secret_key = serializers.CharField(max_length=40, allow_blank=True, required=False)
key_file_path = serializers.CharField(max_length=64, allow_blank=True, required=False)
account_name = serializers.CharField(max_length=24, allow_blank=True, required=False)
class Meta:
......@@ -799,7 +800,7 @@ class CloudStorageSerializer(serializers.ModelSerializer):
fields = (
'provider_type', 'resource', 'display_name', 'owner', 'credentials_type',
'created_date', 'updated_date', 'session_token', 'account_name', 'key',
'secret_key', 'specific_attributes', 'description'
'secret_key', 'key_file_path', 'specific_attributes', 'description'
)
read_only_fields = ('created_date', 'updated_date', 'owner')
......@@ -816,6 +817,9 @@ class CloudStorageSerializer(serializers.ModelSerializer):
if attrs.get('provider_type') == models.CloudProviderChoice.AZURE_CONTAINER:
if not attrs.get('account_name', ''):
raise serializers.ValidationError('Account name for Azure container was not specified')
if attrs.get('provider_type') == models.CloudProviderChoice.GOOGLE_CLOUD_STORAGE:
if not attrs.get('key_file_path', ''):
raise serializers.ValidationError('Key file path for Google cloud storage was not specified')
return attrs
def create(self, validated_data):
......@@ -826,6 +830,7 @@ class CloudStorageSerializer(serializers.ModelSerializer):
key=validated_data.pop('key', ''),
secret_key=validated_data.pop('secret_key', ''),
session_token=validated_data.pop('session_token', ''),
key_file_path=validated_data.pop('key_file_path', ''),
credentials_type = validated_data.get('credentials_type')
)
if should_be_created:
......@@ -859,7 +864,7 @@ class CloudStorageSerializer(serializers.ModelSerializer):
'type': instance.credentials_type,
'value': instance.credentials,
})
tmp = {k:v for k,v in validated_data.items() if k in {'key', 'secret_key', 'account_name', 'session_token', 'credentials_type'}}
tmp = {k:v for k,v in validated_data.items() if k in {'key', 'secret_key', 'account_name', 'session_token', 'key_file_path', 'credentials_type'}}
credentials.mapping_with_new_values(tmp)
instance.credentials = credentials.convert_to_db()
instance.credentials_type = validated_data.get('credentials_type', instance.credentials_type)
......
......@@ -1262,7 +1262,8 @@ class CloudStorageViewSet(auth.CloudStorageGetQuerySetMixin, viewsets.ModelViewS
session_token=serializer.validated_data.get('session_token', ''),
account_name=serializer.validated_data.get('account_name', ''),
key=serializer.validated_data.get('key', ''),
secret_key=serializer.validated_data.get('secret_key', '')
secret_key=serializer.validated_data.get('secret_key', ''),
key_file_path=serializer.validated_data.get('key_file_path', '')
)
details = {
'resource': serializer.validated_data.get('resource'),
......
......@@ -47,6 +47,7 @@ diskcache==5.0.2
open3d==0.11.2
boto3==1.17.61
azure-storage-blob==12.8.1
google-cloud-storage==1.42.0
# --no-binary=datumaro: workaround for pip to install
# opencv-headless instead of regular opencv, to actually run setup script
# --no-binary=pycocotools: workaround for binary incompatibility on numpy 1.20
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册