Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
looyolo
scrapy
提交
3ada45a9
S
scrapy
项目概览
looyolo
/
scrapy
与 Fork 源项目一致
从无法访问的项目Fork
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
S
scrapy
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
3ada45a9
编写于
2月 15, 2016
作者:
K
Konstantin Lopuhin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
S3FeedStorageTest: add botocore support, and organize boto/botocore checks
上级
5d2f0674
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
77 addition
and
46 deletion
+77
-46
scrapy/core/downloader/handlers/s3.py
scrapy/core/downloader/handlers/s3.py
+11
-20
scrapy/extensions/feedexport.py
scrapy/extensions/feedexport.py
+21
-11
scrapy/utils/boto.py
scrapy/utils/boto.py
+20
-0
scrapy/utils/test.py
scrapy/utils/test.py
+6
-10
tests/test_feedexport.py
tests/test_feedexport.py
+19
-5
未找到文件。
scrapy/core/downloader/handlers/s3.py
浏览文件 @
3ada45a9
import
six
from
six.moves.urllib.parse
import
unquote
from
scrapy.exceptions
import
NotConfigured
from
scrapy.utils.httpobj
import
urlparse_cached
from
scrapy.utils.
python
import
to_unicod
e
from
scrapy.utils.
boto
import
is_botocor
e
from
.http
import
HTTPDownloadHandler
def
get_s3_connection
():
try
:
from
boto.s3.connection
import
S3Connection
except
ImportError
:
return
None
def
_get_boto_connection
():
from
boto.s3.connection
import
S3Connection
class
_v19_S3Connection
(
S3Connection
):
"""A dummy S3Connection wrapper that doesn't do any synchronous download"""
...
...
@@ -53,21 +49,9 @@ class S3DownloadHandler(object):
self
.
anon
=
kw
.
get
(
'anon'
)
self
.
_signer
=
None
try
:
if
is_botocore
()
:
import
botocore.auth
import
botocore.credentials
except
ImportError
:
if
six
.
PY3
:
raise
NotConfigured
(
"missing botocore library"
)
_S3Connection
=
get_s3_connection
()
if
_S3Connection
is
None
:
raise
NotConfigured
(
"missing botocore or boto library"
)
try
:
self
.
conn
=
_S3Connection
(
aws_access_key_id
,
aws_secret_access_key
,
**
kw
)
except
Exception
as
ex
:
raise
NotConfigured
(
str
(
ex
))
else
:
kw
.
pop
(
'anon'
,
None
)
if
kw
:
raise
TypeError
(
'Unexpected keyword arguments: %s'
%
kw
)
...
...
@@ -75,6 +59,13 @@ class S3DownloadHandler(object):
SignerCls
=
botocore
.
auth
.
AUTH_TYPE_MAPS
[
's3'
]
self
.
_signer
=
SignerCls
(
botocore
.
credentials
.
Credentials
(
aws_access_key_id
,
aws_secret_access_key
))
else
:
_S3Connection
=
_get_boto_connection
()
try
:
self
.
conn
=
_S3Connection
(
aws_access_key_id
,
aws_secret_access_key
,
**
kw
)
except
Exception
as
ex
:
raise
NotConfigured
(
str
(
ex
))
self
.
_download_http
=
httpdownloadhandler
(
settings
).
download_request
...
...
scrapy/extensions/feedexport.py
浏览文件 @
3ada45a9
...
...
@@ -24,6 +24,7 @@ from scrapy.exceptions import NotConfigured
from
scrapy.utils.misc
import
load_object
from
scrapy.utils.log
import
failure_to_exc_info
from
scrapy.utils.python
import
without_none_values
from
scrapy.utils.boto
import
is_botocore
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -90,24 +91,33 @@ class S3FeedStorage(BlockingFeedStorage):
def
__init__
(
self
,
uri
):
from
scrapy.conf
import
settings
try
:
import
boto
except
ImportError
:
raise
NotConfigured
self
.
connect_s3
=
boto
.
connect_s3
u
=
urlparse
(
uri
)
self
.
bucketname
=
u
.
hostname
self
.
access_key
=
u
.
username
or
settings
[
'AWS_ACCESS_KEY_ID'
]
self
.
secret_key
=
u
.
password
or
settings
[
'AWS_SECRET_ACCESS_KEY'
]
self
.
keyname
=
u
.
path
self
.
is_botocore
=
is_botocore
()
self
.
keyname
=
u
.
path
[
1
:]
# remove first "/"
if
self
.
is_botocore
:
import
botocore.session
session
=
botocore
.
session
.
get_session
()
self
.
s3_client
=
session
.
create_client
(
's3'
,
aws_access_key_id
=
self
.
access_key
,
aws_secret_access_key
=
self
.
secret_key
)
else
:
import
boto
self
.
connect_s3
=
boto
.
connect_s3
def
_store_in_thread
(
self
,
file
):
file
.
seek
(
0
)
conn
=
self
.
connect_s3
(
self
.
access_key
,
self
.
secret_key
)
bucket
=
conn
.
get_bucket
(
self
.
bucketname
,
validate
=
False
)
key
=
bucket
.
new_key
(
self
.
keyname
)
key
.
set_contents_from_file
(
file
)
key
.
close
()
if
self
.
is_botocore
:
self
.
s3_client
.
put_object
(
Bucket
=
self
.
bucketname
,
Key
=
self
.
keyname
,
Body
=
file
)
else
:
conn
=
self
.
connect_s3
(
self
.
access_key
,
self
.
secret_key
)
bucket
=
conn
.
get_bucket
(
self
.
bucketname
,
validate
=
False
)
key
=
bucket
.
new_key
(
self
.
keyname
)
key
.
set_contents_from_file
(
file
)
key
.
close
()
class
FTPFeedStorage
(
BlockingFeedStorage
):
...
...
scrapy/utils/boto.py
0 → 100644
浏览文件 @
3ada45a9
"""Boto/botocore helpers"""
import
six
from
scrapy.exceptions
import
NotConfigured
def
is_botocore
():
try
:
import
botocore
return
True
except
ImportError
:
if
six
.
PY2
:
try
:
import
boto
return
False
except
ImportError
:
raise
NotConfigured
(
'missing botocore or boto library'
)
else
:
raise
NotConfigured
(
'missing botocore library'
)
scrapy/utils/test.py
浏览文件 @
3ada45a9
...
...
@@ -5,9 +5,11 @@ This module contains some assorted functions used in tests
import
os
from
importlib
import
import_module
import
six
from
twisted.trial.unittest
import
SkipTest
from
scrapy.exceptions
import
NotConfigured
from
scrapy.utils.boto
import
is_botocore
def
assert_aws_environ
():
"""Asserts the current environment is suitable for running AWS testsi.
...
...
@@ -19,15 +21,9 @@ def assert_aws_environ():
def
skip_if_no_boto
():
try
:
import
botocore
except
ImportError
:
if
six
.
PY2
:
try
:
import
boto
except
ImportError
:
raise
SkipTest
(
'missing botocore or boto library'
)
else
:
raise
SkipTest
(
'missing botocore library'
)
is_botocore
()
except
NotConfigured
as
e
:
raise
SkipTest
(
e
.
message
)
def
get_crawler
(
spidercls
=
None
,
settings_dict
=
None
):
"""Return an unconfigured Crawler object. If settings_dict is given, it
...
...
tests/test_feedexport.py
浏览文件 @
3ada45a9
...
...
@@ -22,6 +22,7 @@ from scrapy.extensions.feedexport import (
)
from
scrapy.utils.test
import
assert_aws_environ
from
scrapy.utils.python
import
to_native_str
from
scrapy.utils.boto
import
is_botocore
class
FileFeedStorageTest
(
unittest
.
TestCase
):
...
...
@@ -95,17 +96,30 @@ class S3FeedStorageTest(unittest.TestCase):
uri
=
os
.
environ
.
get
(
'FEEDTEST_S3_URI'
)
if
not
uri
:
raise
unittest
.
SkipTest
(
"No S3 URI available for testing"
)
from
boto
import
connect_s3
storage
=
S3FeedStorage
(
uri
)
verifyObject
(
IFeedStorage
,
storage
)
file
=
storage
.
open
(
scrapy
.
Spider
(
"default"
))
file
.
write
(
"content"
)
yield
storage
.
store
(
file
)
u
=
urlparse
(
uri
)
bucket
=
connect_s3
().
get_bucket
(
u
.
hostname
,
validate
=
False
)
key
=
bucket
.
get_key
(
u
.
path
)
self
.
assertEqual
(
key
.
get_contents_as_string
(),
"content"
)
bucket
.
delete_key
(
u
.
path
)
content
=
self
.
_get_content_and_delete
(
u
.
hostname
,
u
.
path
[
1
:])
self
.
assertEqual
(
content
,
"content"
)
def
_get_content_and_delete
(
self
,
bucket
,
path
):
if
is_botocore
():
import
botocore.session
session
=
botocore
.
session
.
get_session
()
client
=
session
.
create_client
(
's3'
)
key
=
client
.
get_object
(
Bucket
=
bucket
,
Key
=
path
)
content
=
key
[
'Body'
].
read
()
client
.
delete_object
(
Bucket
=
bucket
,
Key
=
path
)
else
:
from
boto
import
connect_s3
bucket
=
connect_s3
().
get_bucket
(
bucket
,
validate
=
False
)
key
=
bucket
.
get_key
(
path
)
content
=
key
.
get_contents_as_string
()
bucket
.
delete_key
(
path
)
return
content
class
StdoutFeedStorageTest
(
unittest
.
TestCase
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录