Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
looyolo
scrapy
提交
d61fbcc8
S
scrapy
项目概览
looyolo
/
scrapy
与 Fork 源项目一致
从无法访问的项目Fork
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
S
scrapy
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
d61fbcc8
编写于
2月 18, 2016
作者:
K
Konstantin Lopuhin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Support headers in S3FilesStore.persist_file for botocore
上级
617631f2
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
53 addition
and
5 deletion
+53
-5
scrapy/pipelines/files.py
scrapy/pipelines/files.py
+35
-1
scrapy/utils/test.py
scrapy/utils/test.py
+2
-2
tests/test_pipeline_files.py
tests/test_pipeline_files.py
+16
-2
未找到文件。
scrapy/pipelines/files.py
浏览文件 @
d61fbcc8
...
...
@@ -29,6 +29,7 @@ from scrapy.utils.log import failure_to_exc_info
from
scrapy.utils.python
import
to_bytes
from
scrapy.utils.request
import
referer_str
from
scrapy.utils.boto
import
is_botocore
from
scrapy.utils.datatypes
import
CaselessDict
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -137,13 +138,17 @@ class S3FilesStore(object):
key_name
=
'%s%s'
%
(
self
.
prefix
,
path
)
buf
.
seek
(
0
)
if
self
.
is_botocore
:
extra
=
self
.
_headers_to_botocore_kwargs
(
self
.
HEADERS
)
if
headers
:
extra
.
update
(
self
.
_headers_to_botocore_kwargs
(
headers
))
return
threads
.
deferToThread
(
self
.
s3_client
.
put_object
,
Bucket
=
self
.
bucket
,
Key
=
key_name
,
Body
=
buf
,
Metadata
=
{
k
:
str
(
v
)
for
k
,
v
in
six
.
iteritems
(
meta
)},
ACL
=
self
.
POLICY
)
ACL
=
self
.
POLICY
,
**
extra
)
else
:
b
=
self
.
_get_boto_bucket
()
k
=
b
.
new_key
(
key_name
)
...
...
@@ -157,6 +162,35 @@ class S3FilesStore(object):
k
.
set_contents_from_string
,
buf
.
getvalue
(),
headers
=
h
,
policy
=
self
.
POLICY
)
def
_headers_to_botocore_kwargs
(
self
,
headers
):
""" Convert headers to botocore keyword agruments.
"""
# This is required while we need to support both boto and botocore.
mapping
=
CaselessDict
({
'Content-Type'
:
'ContentType'
,
'Cache-Control'
:
'CacheControl'
,
'Content-Disposition'
:
'ContentDisposition'
,
'Content-Encoding'
:
'ContentEncoding'
,
'Content-Language'
:
'ContentLanguage'
,
'Content-Length'
:
'ContentLength'
,
'Content-MD5'
:
'ContentMD5'
,
'Expires'
:
'Expires'
,
'X-Amz-Grant-Full-Control'
:
'GrantFullControl'
,
'X-Amz-Grant-Read'
:
'GrantRead'
,
'X-Amz-Grant-Read-ACP'
:
'GrantReadACP'
,
'X-Amz-Grant-Write-ACP'
:
'GrantWriteACP'
,
})
extra
=
{}
for
key
,
value
in
six
.
iteritems
(
headers
):
try
:
kwarg
=
mapping
[
key
]
except
KeyError
:
raise
TypeError
(
'Header "%s" is not supported by botocore'
%
key
)
else
:
extra
[
kwarg
]
=
value
return
extra
class
FilesPipeline
(
MediaPipeline
):
"""Abstract pipeline that implement the file downloading
...
...
scrapy/utils/test.py
浏览文件 @
d61fbcc8
...
...
@@ -26,7 +26,7 @@ def skip_if_no_boto():
except
NotConfigured
as
e
:
raise
SkipTest
(
e
.
message
)
def
get_s3_content_and_delete
(
bucket
,
path
):
def
get_s3_content_and_delete
(
bucket
,
path
,
with_key
=
False
):
""" Get content from s3 key, and delete key afterwards.
"""
if
is_botocore
():
...
...
@@ -43,7 +43,7 @@ def get_s3_content_and_delete(bucket, path):
key
=
bucket
.
get_key
(
path
)
content
=
key
.
get_contents_as_string
()
bucket
.
delete_key
(
path
)
return
content
return
(
content
,
key
)
if
with_key
else
content
def
get_crawler
(
spidercls
=
None
,
settings_dict
=
None
):
"""Return an unconfigured Crawler object. If settings_dict is given, it
...
...
tests/test_pipeline_files.py
浏览文件 @
d61fbcc8
...
...
@@ -16,6 +16,7 @@ from scrapy.http import Request, Response
from
scrapy.settings
import
Settings
from
scrapy.utils.python
import
to_bytes
from
scrapy.utils.test
import
assert_aws_environ
,
get_s3_content_and_delete
from
scrapy.utils.boto
import
is_botocore
from
tests
import
mock
...
...
@@ -194,14 +195,27 @@ class TestS3FilesStore(unittest.TestCase):
meta
=
{
'foo'
:
'bar'
}
path
=
''
store
=
S3FilesStore
(
uri
)
yield
store
.
persist_file
(
path
,
buf
,
info
=
None
,
meta
=
meta
)
yield
store
.
persist_file
(
path
,
buf
,
info
=
None
,
meta
=
meta
,
headers
=
{
'Content-Type'
:
'image/png'
})
s
=
yield
store
.
stat_file
(
path
,
info
=
None
)
self
.
assertIn
(
'last_modified'
,
s
)
self
.
assertIn
(
'checksum'
,
s
)
self
.
assertEqual
(
s
[
'checksum'
],
'3187896a9657a28163abb31667df64c8'
)
u
=
urlparse
(
uri
)
content
=
get_s3_content_and_delete
(
u
.
hostname
,
u
.
path
[
1
:])
content
,
key
=
get_s3_content_and_delete
(
u
.
hostname
,
u
.
path
[
1
:],
with_key
=
True
)
self
.
assertEqual
(
content
,
data
)
if
is_botocore
():
self
.
assertEqual
(
key
[
'Metadata'
],
{
'foo'
:
'bar'
})
self
.
assertEqual
(
key
[
'CacheControl'
],
S3FilesStore
.
HEADERS
[
'Cache-Control'
])
self
.
assertEqual
(
key
[
'ContentType'
],
'image/png'
)
else
:
self
.
assertEqual
(
key
.
metadata
,
{
'foo'
:
'bar'
})
self
.
assertEqual
(
key
.
cache_control
,
S3FilesStore
.
HEADERS
[
'Cache-Control'
])
self
.
assertEqual
(
key
.
content_type
,
'image/png'
)
class
ItemWithFiles
(
Item
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录