提交 eaf3a239 编写于 作者: K Konstantin Lopuhin

using botocore for s3 request signing: proof of concept

上级 41588397
import six
from six.moves.urllib.parse import unquote
from scrapy.exceptions import NotConfigured
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.python import to_unicode
from .http import HTTPDownloadHandler
......@@ -37,10 +39,6 @@ class S3DownloadHandler(object):
def __init__(self, settings, aws_access_key_id=None, aws_secret_access_key=None, \
httpdownloadhandler=HTTPDownloadHandler, **kw):
_S3Connection = get_s3_connection()
if _S3Connection is None:
raise NotConfigured("missing boto library")
if not aws_access_key_id:
aws_access_key_id = settings['AWS_ACCESS_KEY_ID']
if not aws_secret_access_key:
......@@ -53,10 +51,27 @@ class S3DownloadHandler(object):
if anon is None and not aws_access_key_id and not aws_secret_access_key:
kw['anon'] = True
self._signer = None
try:
self.conn = _S3Connection(aws_access_key_id, aws_secret_access_key, **kw)
except Exception as ex:
raise NotConfigured(str(ex))
import botocore.auth
import botocore.credentials
except ImportError:
if six.PY3:
raise NotConfigured("missing botocore library")
_S3Connection = get_s3_connection()
if _S3Connection is None:
raise NotConfigured("missing botocore or boto library")
try:
self.conn = _S3Connection(
aws_access_key_id, aws_secret_access_key, **kw)
except Exception as ex:
raise NotConfigured(str(ex))
else:
SignerCls = botocore.auth.AUTH_TYPE_MAPS['s3']
# TODO - anon
self._signer = SignerCls(botocore.credentials.Credentials(
aws_access_key_id, aws_secret_access_key))
self._download_http = httpdownloadhandler(settings).download_request
def download_request(self, request, spider):
......@@ -65,12 +80,28 @@ class S3DownloadHandler(object):
bucket = p.hostname
path = p.path + '?' + p.query if p.query else p.path
url = '%s://%s.s3.amazonaws.com%s' % (scheme, bucket, path)
signed_headers = self.conn.make_request(
if self._signer is not None:
import botocore.awsrequest
from botocore.vendored.requests.structures import CaseInsensitiveDict
print(url, request.headers)
awsrequest = botocore.awsrequest.AWSRequest(
method=request.method,
bucket=bucket,
key=unquote(p.path),
query_args=unquote(p.query),
headers=request.headers,
url='%s://s3.amazonaws.com/%s%s' % (scheme, bucket, path),
# TODO - move to a header method
headers=CaseInsensitiveDict(
(to_unicode(key), to_unicode(b','.join(value)))
for key, value in request.headers.items()),
data=request.body)
httpreq = request.replace(url=url, headers=signed_headers)
return self._download_http(httpreq, spider)
self._signer.add_auth(awsrequest)
request = request.replace(
url=url, headers=awsrequest.headers.items())
else:
signed_headers = self.conn.make_request(
method=request.method,
bucket=bucket,
key=unquote(p.path),
query_args=unquote(p.query),
headers=request.headers,
data=request.body)
request = request.replace(url=url, headers=signed_headers)
return self._download_http(request, spider)
......@@ -432,13 +432,20 @@ class HttpDownloadHandlerMock(object):
return request
class S3AnonTestCase(unittest.TestCase):
try:
import boto
except ImportError:
skip = 'missing boto library'
class BaseS3TestCase(unittest.TestCase):
if six.PY3:
skip = 'S3 not supported on Py3'
try:
import botocore
except ImportError:
skip = 'missing botocore library'
else:
try:
import boto
except ImportError:
skip = 'missing boto library'
class S3AnonTestCase(BaseS3TestCase):
def setUp(self):
self.s3reqh = S3DownloadHandler(Settings(),
......@@ -457,12 +464,6 @@ class S3AnonTestCase(unittest.TestCase):
class S3TestCase(unittest.TestCase):
download_handler_cls = S3DownloadHandler
try:
import boto
except ImportError:
skip = 'missing boto library'
if six.PY3:
skip = 'S3 not supported on Py3'
# test use same example keys than amazon developer guide
# http://s3.amazonaws.com/awsdocs/S3/20060301/s3-dg-20060301.pdf
......@@ -484,7 +485,7 @@ class S3TestCase(unittest.TestCase):
headers={'Date': 'Tue, 27 Mar 2007 19:36:42 +0000'})
httpreq = self.download_request(req, self.spider)
self.assertEqual(httpreq.headers['Authorization'], \
'AWS 0PN5J17HBGZHT7JJ3X82:xXjDGYUmKxnwqr5KXNPGldn5LbA=')
b'AWS 0PN5J17HBGZHT7JJ3X82:xXjDGYUmKxnwqr5KXNPGldn5LbA=')
def test_request_signing2(self):
# puts an object into the johnsmith bucket.
......@@ -495,7 +496,7 @@ class S3TestCase(unittest.TestCase):
})
httpreq = self.download_request(req, self.spider)
self.assertEqual(httpreq.headers['Authorization'], \
'AWS 0PN5J17HBGZHT7JJ3X82:hcicpDDvL9SsO6AkvxqmIWkmOuQ=')
b'AWS 0PN5J17HBGZHT7JJ3X82:hcicpDDvL9SsO6AkvxqmIWkmOuQ=')
def test_request_signing3(self):
# lists the content of the johnsmith bucket.
......@@ -506,7 +507,7 @@ class S3TestCase(unittest.TestCase):
})
httpreq = self.download_request(req, self.spider)
self.assertEqual(httpreq.headers['Authorization'], \
'AWS 0PN5J17HBGZHT7JJ3X82:jsRt/rhG+Vtp88HrYL706QhE4w4=')
b'AWS 0PN5J17HBGZHT7JJ3X82:jsRt/rhG+Vtp88HrYL706QhE4w4=')
def test_request_signing4(self):
# fetches the access control policy sub-resource for the 'johnsmith' bucket.
......@@ -514,7 +515,7 @@ class S3TestCase(unittest.TestCase):
method='GET', headers={'Date': 'Tue, 27 Mar 2007 19:44:46 +0000'})
httpreq = self.download_request(req, self.spider)
self.assertEqual(httpreq.headers['Authorization'], \
'AWS 0PN5J17HBGZHT7JJ3X82:thdUi9VAkzhkniLj96JIrOPGi0g=')
b'AWS 0PN5J17HBGZHT7JJ3X82:thdUi9VAkzhkniLj96JIrOPGi0g=')
def test_request_signing5(self):
# deletes an object from the 'johnsmith' bucket using the
......@@ -526,7 +527,7 @@ class S3TestCase(unittest.TestCase):
})
httpreq = self.download_request(req, self.spider)
self.assertEqual(httpreq.headers['Authorization'], \
'AWS 0PN5J17HBGZHT7JJ3X82:k3nL7gH3+PadhTEVn5Ip83xlYzk=')
b'AWS 0PN5J17HBGZHT7JJ3X82:k3nL7gH3+PadhTEVn5Ip83xlYzk=')
def test_request_signing6(self):
# uploads an object to a CNAME style virtual hosted bucket with metadata.
......@@ -547,7 +548,7 @@ class S3TestCase(unittest.TestCase):
})
httpreq = self.download_request(req, self.spider)
self.assertEqual(httpreq.headers['Authorization'], \
'AWS 0PN5J17HBGZHT7JJ3X82:C0FlOtU8Ylb9KDTpZqYkZPX91iI=')
b'AWS 0PN5J17HBGZHT7JJ3X82:C0FlOtU8Ylb9KDTpZqYkZPX91iI=')
def test_request_signing7(self):
# ensure that spaces are quoted properly before signing
......@@ -561,7 +562,7 @@ class S3TestCase(unittest.TestCase):
httpreq = self.download_request(req, self.spider)
self.assertEqual(
httpreq.headers['Authorization'],
'AWS 0PN5J17HBGZHT7JJ3X82:+CfvG8EZ3YccOrRVMXNaK2eKZmM=')
b'AWS 0PN5J17HBGZHT7JJ3X82:+CfvG8EZ3YccOrRVMXNaK2eKZmM=')
class FTPTestCase(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册