Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
looyolo
scrapy
提交
eaf3a239
S
scrapy
项目概览
looyolo
/
scrapy
与 Fork 源项目一致
从无法访问的项目Fork
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
S
scrapy
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
eaf3a239
编写于
2月 05, 2016
作者:
K
Konstantin Lopuhin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
using botocore for s3 request signing: proof of concept
上级
41588397
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
65 addition
and
33 deletion
+65
-33
scrapy/core/downloader/handlers/s3.py
scrapy/core/downloader/handlers/s3.py
+45
-14
tests/test_downloader_handlers.py
tests/test_downloader_handlers.py
+20
-19
未找到文件。
scrapy/core/downloader/handlers/s3.py
浏览文件 @
eaf3a239
import
six
from
six.moves.urllib.parse
import
unquote
from
scrapy.exceptions
import
NotConfigured
from
scrapy.utils.httpobj
import
urlparse_cached
from
scrapy.utils.python
import
to_unicode
from
.http
import
HTTPDownloadHandler
...
...
@@ -37,10 +39,6 @@ class S3DownloadHandler(object):
def
__init__
(
self
,
settings
,
aws_access_key_id
=
None
,
aws_secret_access_key
=
None
,
\
httpdownloadhandler
=
HTTPDownloadHandler
,
**
kw
):
_S3Connection
=
get_s3_connection
()
if
_S3Connection
is
None
:
raise
NotConfigured
(
"missing boto library"
)
if
not
aws_access_key_id
:
aws_access_key_id
=
settings
[
'AWS_ACCESS_KEY_ID'
]
if
not
aws_secret_access_key
:
...
...
@@ -53,10 +51,27 @@ class S3DownloadHandler(object):
if
anon
is
None
and
not
aws_access_key_id
and
not
aws_secret_access_key
:
kw
[
'anon'
]
=
True
self
.
_signer
=
None
try
:
self
.
conn
=
_S3Connection
(
aws_access_key_id
,
aws_secret_access_key
,
**
kw
)
except
Exception
as
ex
:
raise
NotConfigured
(
str
(
ex
))
import
botocore.auth
import
botocore.credentials
except
ImportError
:
if
six
.
PY3
:
raise
NotConfigured
(
"missing botocore library"
)
_S3Connection
=
get_s3_connection
()
if
_S3Connection
is
None
:
raise
NotConfigured
(
"missing botocore or boto library"
)
try
:
self
.
conn
=
_S3Connection
(
aws_access_key_id
,
aws_secret_access_key
,
**
kw
)
except
Exception
as
ex
:
raise
NotConfigured
(
str
(
ex
))
else
:
SignerCls
=
botocore
.
auth
.
AUTH_TYPE_MAPS
[
's3'
]
# TODO - anon
self
.
_signer
=
SignerCls
(
botocore
.
credentials
.
Credentials
(
aws_access_key_id
,
aws_secret_access_key
))
self
.
_download_http
=
httpdownloadhandler
(
settings
).
download_request
def
download_request
(
self
,
request
,
spider
):
...
...
@@ -65,12 +80,28 @@ class S3DownloadHandler(object):
bucket
=
p
.
hostname
path
=
p
.
path
+
'?'
+
p
.
query
if
p
.
query
else
p
.
path
url
=
'%s://%s.s3.amazonaws.com%s'
%
(
scheme
,
bucket
,
path
)
signed_headers
=
self
.
conn
.
make_request
(
if
self
.
_signer
is
not
None
:
import
botocore.awsrequest
from
botocore.vendored.requests.structures
import
CaseInsensitiveDict
print
(
url
,
request
.
headers
)
awsrequest
=
botocore
.
awsrequest
.
AWSRequest
(
method
=
request
.
method
,
bucket
=
bucket
,
key
=
unquote
(
p
.
path
),
query_args
=
unquote
(
p
.
query
),
headers
=
request
.
headers
,
url
=
'%s://s3.amazonaws.com/%s%s'
%
(
scheme
,
bucket
,
path
),
# TODO - move to a header method
headers
=
CaseInsensitiveDict
(
(
to_unicode
(
key
),
to_unicode
(
b
','
.
join
(
value
)))
for
key
,
value
in
request
.
headers
.
items
()),
data
=
request
.
body
)
httpreq
=
request
.
replace
(
url
=
url
,
headers
=
signed_headers
)
return
self
.
_download_http
(
httpreq
,
spider
)
self
.
_signer
.
add_auth
(
awsrequest
)
request
=
request
.
replace
(
url
=
url
,
headers
=
awsrequest
.
headers
.
items
())
else
:
signed_headers
=
self
.
conn
.
make_request
(
method
=
request
.
method
,
bucket
=
bucket
,
key
=
unquote
(
p
.
path
),
query_args
=
unquote
(
p
.
query
),
headers
=
request
.
headers
,
data
=
request
.
body
)
request
=
request
.
replace
(
url
=
url
,
headers
=
signed_headers
)
return
self
.
_download_http
(
request
,
spider
)
tests/test_downloader_handlers.py
浏览文件 @
eaf3a239
...
...
@@ -432,13 +432,20 @@ class HttpDownloadHandlerMock(object):
return
request
class
S3AnonTestCase
(
unittest
.
TestCase
):
try
:
import
boto
except
ImportError
:
skip
=
'missing boto library'
class
BaseS3TestCase
(
unittest
.
TestCase
):
if
six
.
PY3
:
skip
=
'S3 not supported on Py3'
try
:
import
botocore
except
ImportError
:
skip
=
'missing botocore library'
else
:
try
:
import
boto
except
ImportError
:
skip
=
'missing boto library'
class
S3AnonTestCase
(
BaseS3TestCase
):
def
setUp
(
self
):
self
.
s3reqh
=
S3DownloadHandler
(
Settings
(),
...
...
@@ -457,12 +464,6 @@ class S3AnonTestCase(unittest.TestCase):
class
S3TestCase
(
unittest
.
TestCase
):
download_handler_cls
=
S3DownloadHandler
try
:
import
boto
except
ImportError
:
skip
=
'missing boto library'
if
six
.
PY3
:
skip
=
'S3 not supported on Py3'
# test use same example keys than amazon developer guide
# http://s3.amazonaws.com/awsdocs/S3/20060301/s3-dg-20060301.pdf
...
...
@@ -484,7 +485,7 @@ class S3TestCase(unittest.TestCase):
headers
=
{
'Date'
:
'Tue, 27 Mar 2007 19:36:42 +0000'
})
httpreq
=
self
.
download_request
(
req
,
self
.
spider
)
self
.
assertEqual
(
httpreq
.
headers
[
'Authorization'
],
\
'AWS 0PN5J17HBGZHT7JJ3X82:xXjDGYUmKxnwqr5KXNPGldn5LbA='
)
b
'AWS 0PN5J17HBGZHT7JJ3X82:xXjDGYUmKxnwqr5KXNPGldn5LbA='
)
def
test_request_signing2
(
self
):
# puts an object into the johnsmith bucket.
...
...
@@ -495,7 +496,7 @@ class S3TestCase(unittest.TestCase):
})
httpreq
=
self
.
download_request
(
req
,
self
.
spider
)
self
.
assertEqual
(
httpreq
.
headers
[
'Authorization'
],
\
'AWS 0PN5J17HBGZHT7JJ3X82:hcicpDDvL9SsO6AkvxqmIWkmOuQ='
)
b
'AWS 0PN5J17HBGZHT7JJ3X82:hcicpDDvL9SsO6AkvxqmIWkmOuQ='
)
def
test_request_signing3
(
self
):
# lists the content of the johnsmith bucket.
...
...
@@ -506,7 +507,7 @@ class S3TestCase(unittest.TestCase):
})
httpreq
=
self
.
download_request
(
req
,
self
.
spider
)
self
.
assertEqual
(
httpreq
.
headers
[
'Authorization'
],
\
'AWS 0PN5J17HBGZHT7JJ3X82:jsRt/rhG+Vtp88HrYL706QhE4w4='
)
b
'AWS 0PN5J17HBGZHT7JJ3X82:jsRt/rhG+Vtp88HrYL706QhE4w4='
)
def
test_request_signing4
(
self
):
# fetches the access control policy sub-resource for the 'johnsmith' bucket.
...
...
@@ -514,7 +515,7 @@ class S3TestCase(unittest.TestCase):
method
=
'GET'
,
headers
=
{
'Date'
:
'Tue, 27 Mar 2007 19:44:46 +0000'
})
httpreq
=
self
.
download_request
(
req
,
self
.
spider
)
self
.
assertEqual
(
httpreq
.
headers
[
'Authorization'
],
\
'AWS 0PN5J17HBGZHT7JJ3X82:thdUi9VAkzhkniLj96JIrOPGi0g='
)
b
'AWS 0PN5J17HBGZHT7JJ3X82:thdUi9VAkzhkniLj96JIrOPGi0g='
)
def
test_request_signing5
(
self
):
# deletes an object from the 'johnsmith' bucket using the
...
...
@@ -526,7 +527,7 @@ class S3TestCase(unittest.TestCase):
})
httpreq
=
self
.
download_request
(
req
,
self
.
spider
)
self
.
assertEqual
(
httpreq
.
headers
[
'Authorization'
],
\
'AWS 0PN5J17HBGZHT7JJ3X82:k3nL7gH3+PadhTEVn5Ip83xlYzk='
)
b
'AWS 0PN5J17HBGZHT7JJ3X82:k3nL7gH3+PadhTEVn5Ip83xlYzk='
)
def
test_request_signing6
(
self
):
# uploads an object to a CNAME style virtual hosted bucket with metadata.
...
...
@@ -547,7 +548,7 @@ class S3TestCase(unittest.TestCase):
})
httpreq
=
self
.
download_request
(
req
,
self
.
spider
)
self
.
assertEqual
(
httpreq
.
headers
[
'Authorization'
],
\
'AWS 0PN5J17HBGZHT7JJ3X82:C0FlOtU8Ylb9KDTpZqYkZPX91iI='
)
b
'AWS 0PN5J17HBGZHT7JJ3X82:C0FlOtU8Ylb9KDTpZqYkZPX91iI='
)
def
test_request_signing7
(
self
):
# ensure that spaces are quoted properly before signing
...
...
@@ -561,7 +562,7 @@ class S3TestCase(unittest.TestCase):
httpreq
=
self
.
download_request
(
req
,
self
.
spider
)
self
.
assertEqual
(
httpreq
.
headers
[
'Authorization'
],
'AWS 0PN5J17HBGZHT7JJ3X82:+CfvG8EZ3YccOrRVMXNaK2eKZmM='
)
b
'AWS 0PN5J17HBGZHT7JJ3X82:+CfvG8EZ3YccOrRVMXNaK2eKZmM='
)
class
FTPTestCase
(
unittest
.
TestCase
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录