提交 e2bd1be9 编写于 作者: P Pablo Hoffman

better aws code arrangement

--HG--
rename : scrapy/tests/test_aws.py => scrapy/tests/test_utils_aws.py
上级 827aa19c
"""
A downloader middleware for signing AWS requests just before they get into the
downloader. It is important to sign as close to the downloader as possible
because Amazon Web Service use timestamps for authentication.
"""
import os
import re
import time
import hmac
import base64
import hashlib
from urlparse import urlsplit
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.aws import sign_request
from scrapy.conf import settings
METADATA_PREFIX = 'x-amz-meta-'
AMAZON_HEADER_PREFIX = 'x-amz-'
# generates the aws canonical string for the given parameters
def canonical_string(method, path, headers, expires=None):
interesting_headers = {}
for key in headers:
lk = key.lower()
if lk in set(['content-md5', 'content-type', 'date']) or lk.startswith(AMAZON_HEADER_PREFIX):
interesting_headers[lk] = headers[key].strip()
# these keys get empty strings if they don't exist
interesting_headers.setdefault('content-type', '')
interesting_headers.setdefault('content-md5', '')
# just in case someone used this. it's not necessary in this lib.
if 'x-amz-date' in interesting_headers:
interesting_headers['date'] = ''
# if you're using expires for query string auth, then it trumps date
# (and x-amz-date)
if expires:
interesting_headers['date'] = str(expires)
sorted_header_keys = interesting_headers.keys()
sorted_header_keys.sort()
buf = "%s\n" % method
for key in sorted_header_keys:
if key.startswith(AMAZON_HEADER_PREFIX):
buf += "%s:%s\n" % (key, interesting_headers[key])
else:
buf += "%s\n" % interesting_headers[key]
# don't include anything after the first ? in the resource...
buf += "%s" % path.split('?')[0]
# ...unless there is an acl or torrent parameter
if re.search("[&?]acl($|=|&)", path):
buf += "?acl"
elif re.search("[&?]logging($|=|&)", path):
buf += "?logging"
elif re.search("[&?]torrent($|=|&)", path):
buf += "?torrent"
elif re.search("[&?]location($|=|&)", path):
buf += "?location"
return buf
def sign_request(req, accesskey, secretkey):
if 'Date' not in req.headers:
req.headers['Date'] = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime())
parsed = urlsplit(req.url)
bucket = parsed.hostname.replace('.s3.amazonaws.com','')
key = '%s?%s' % (parsed.path, parsed.query) if parsed.query else parsed.path
fqkey = '/%s%s' % (bucket, key)
c_string = canonical_string(req.method, fqkey, req.headers)
_hmac = hmac.new(secretkey, digestmod=hashlib.sha1)
_hmac.update(c_string)
b64_hmac = base64.encodestring(_hmac.digest()).strip()
req.headers['Authorization'] = "AWS %s:%s" % (accesskey, b64_hmac)
class AWSMiddleware(object):
def __init__(self):
self.access_key = settings['AWS_ACCESS_KEY_ID'] or os.environ.get('AWS_ACCESS_KEY_ID')
self.secret_key = settings['AWS_SECRET_ACCESS_KEY'] or os.environ.get('AWS_SECRET_ACCESS_KEY')
self.access_key = settings['AWS_ACCESS_KEY_ID'] or \
os.environ.get('AWS_ACCESS_KEY_ID')
self.secret_key = settings['AWS_SECRET_ACCESS_KEY'] or \
os.environ.get('AWS_SECRET_ACCESS_KEY')
def process_request(self, request, spider):
hostname = urlparse_cached(request).hostname
if spider.domain_name == 's3.amazonaws.com' \
or (hostname and hostname.endswith('s3.amazonaws.com')):
request.headers['Date'] = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime())
request.headers['Date'] = time.strftime("%a, %d %b %Y %H:%M:%S GMT", \
time.gmtime())
sign_request(request, self.access_key, self.secret_key)
from unittest import TestCase, main
from scrapy.contrib import aws
from scrapy.utils import aws
from scrapy.http import Request
# keys are provided by amazon developer guide at
# just some random keys. keys are provided by amazon developer guide at
# http://s3.amazonaws.com/awsdocs/S3/20060301/s3-dg-20060301.pdf
# and the tests described here are the examples from that manual
......@@ -103,7 +104,5 @@ class ScrapyAWSTest(TestCase):
'AWS 0PN5J17HBGZHT7JJ3X82:C0FlOtU8Ylb9KDTpZqYkZPX91iI=')
if __name__ == '__main__':
main()
"""Helper function for working with Amazon Web Services"""
import re
import time
import hmac
import base64
import hashlib
from urlparse import urlsplit
AMAZON_HEADER_PREFIX = 'x-amz-'
# generates the aws canonical string for the given parameters
def canonical_string(method, path, headers, expires=None):
interesting_headers = {}
for key in headers:
lk = key.lower()
if lk in set(['content-md5', 'content-type', 'date']) \
or lk.startswith(AMAZON_HEADER_PREFIX):
interesting_headers[lk] = headers[key].strip()
# these keys get empty strings if they don't exist
interesting_headers.setdefault('content-type', '')
interesting_headers.setdefault('content-md5', '')
# just in case someone used this. it's not necessary in this lib.
if 'x-amz-date' in interesting_headers:
interesting_headers['date'] = ''
# if you're using expires for query string auth, then it trumps date
# (and x-amz-date)
if expires:
interesting_headers['date'] = str(expires)
sorted_header_keys = interesting_headers.keys()
sorted_header_keys.sort()
buf = "%s\n" % method
for key in sorted_header_keys:
if key.startswith(AMAZON_HEADER_PREFIX):
buf += "%s:%s\n" % (key, interesting_headers[key])
else:
buf += "%s\n" % interesting_headers[key]
# don't include anything after the first ? in the resource...
buf += "%s" % path.split('?')[0]
# ...unless there is an acl or torrent parameter
if re.search("[&?]acl($|=|&)", path):
buf += "?acl"
elif re.search("[&?]logging($|=|&)", path):
buf += "?logging"
elif re.search("[&?]torrent($|=|&)", path):
buf += "?torrent"
elif re.search("[&?]location($|=|&)", path):
buf += "?location"
return buf
def sign_request(req, accesskey, secretkey):
if 'Date' not in req.headers:
req.headers['Date'] = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime())
parsed = urlsplit(req.url)
bucket = parsed.hostname.replace('.s3.amazonaws.com','')
key = '%s?%s' % (parsed.path, parsed.query) if parsed.query else parsed.path
fqkey = '/%s%s' % (bucket, key)
c_string = canonical_string(req.method, fqkey, req.headers)
_hmac = hmac.new(secretkey, digestmod=hashlib.sha1)
_hmac.update(c_string)
b64_hmac = base64.encodestring(_hmac.digest()).strip()
req.headers['Authorization'] = "AWS %s:%s" % (accesskey, b64_hmac)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册