提交 d61fbcc8 编写于 作者: K Konstantin Lopuhin

Support headers in S3FilesStore.persist_file for botocore

上级 617631f2
......@@ -29,6 +29,7 @@ from scrapy.utils.log import failure_to_exc_info
from scrapy.utils.python import to_bytes
from scrapy.utils.request import referer_str
from scrapy.utils.boto import is_botocore
from scrapy.utils.datatypes import CaselessDict
logger = logging.getLogger(__name__)
......@@ -137,13 +138,17 @@ class S3FilesStore(object):
key_name = '%s%s' % (self.prefix, path)
buf.seek(0)
if self.is_botocore:
extra = self._headers_to_botocore_kwargs(self.HEADERS)
if headers:
extra.update(self._headers_to_botocore_kwargs(headers))
return threads.deferToThread(
self.s3_client.put_object,
Bucket=self.bucket,
Key=key_name,
Body=buf,
Metadata={k: str(v) for k, v in six.iteritems(meta)},
ACL=self.POLICY)
ACL=self.POLICY,
**extra)
else:
b = self._get_boto_bucket()
k = b.new_key(key_name)
......@@ -157,6 +162,35 @@ class S3FilesStore(object):
k.set_contents_from_string, buf.getvalue(),
headers=h, policy=self.POLICY)
def _headers_to_botocore_kwargs(self, headers):
""" Convert headers to botocore keyword agruments.
"""
# This is required while we need to support both boto and botocore.
mapping = CaselessDict({
'Content-Type': 'ContentType',
'Cache-Control': 'CacheControl',
'Content-Disposition': 'ContentDisposition',
'Content-Encoding': 'ContentEncoding',
'Content-Language': 'ContentLanguage',
'Content-Length': 'ContentLength',
'Content-MD5': 'ContentMD5',
'Expires': 'Expires',
'X-Amz-Grant-Full-Control': 'GrantFullControl',
'X-Amz-Grant-Read': 'GrantRead',
'X-Amz-Grant-Read-ACP': 'GrantReadACP',
'X-Amz-Grant-Write-ACP': 'GrantWriteACP',
})
extra = {}
for key, value in six.iteritems(headers):
try:
kwarg = mapping[key]
except KeyError:
raise TypeError(
'Header "%s" is not supported by botocore' % key)
else:
extra[kwarg] = value
return extra
class FilesPipeline(MediaPipeline):
"""Abstract pipeline that implement the file downloading
......
......@@ -26,7 +26,7 @@ def skip_if_no_boto():
except NotConfigured as e:
raise SkipTest(e.message)
def get_s3_content_and_delete(bucket, path):
def get_s3_content_and_delete(bucket, path, with_key=False):
""" Get content from s3 key, and delete key afterwards.
"""
if is_botocore():
......@@ -43,7 +43,7 @@ def get_s3_content_and_delete(bucket, path):
key = bucket.get_key(path)
content = key.get_contents_as_string()
bucket.delete_key(path)
return content
return (content, key) if with_key else content
def get_crawler(spidercls=None, settings_dict=None):
"""Return an unconfigured Crawler object. If settings_dict is given, it
......
......@@ -16,6 +16,7 @@ from scrapy.http import Request, Response
from scrapy.settings import Settings
from scrapy.utils.python import to_bytes
from scrapy.utils.test import assert_aws_environ, get_s3_content_and_delete
from scrapy.utils.boto import is_botocore
from tests import mock
......@@ -194,14 +195,27 @@ class TestS3FilesStore(unittest.TestCase):
meta = {'foo': 'bar'}
path = ''
store = S3FilesStore(uri)
yield store.persist_file(path, buf, info=None, meta=meta)
yield store.persist_file(
path, buf, info=None, meta=meta,
headers={'Content-Type': 'image/png'})
s = yield store.stat_file(path, info=None)
self.assertIn('last_modified', s)
self.assertIn('checksum', s)
self.assertEqual(s['checksum'], '3187896a9657a28163abb31667df64c8')
u = urlparse(uri)
content = get_s3_content_and_delete(u.hostname, u.path[1:])
content, key = get_s3_content_and_delete(
u.hostname, u.path[1:], with_key=True)
self.assertEqual(content, data)
if is_botocore():
self.assertEqual(key['Metadata'], {'foo': 'bar'})
self.assertEqual(
key['CacheControl'], S3FilesStore.HEADERS['Cache-Control'])
self.assertEqual(key['ContentType'], 'image/png')
else:
self.assertEqual(key.metadata, {'foo': 'bar'})
self.assertEqual(
key.cache_control, S3FilesStore.HEADERS['Cache-Control'])
self.assertEqual(key.content_type, 'image/png')
class ItemWithFiles(Item):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册