Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
looyolo
scrapy
提交
752787e6
S
scrapy
项目概览
looyolo
/
scrapy
与 Fork 源项目一致
从无法访问的项目Fork
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
S
scrapy
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
752787e6
编写于
3月 06, 2014
作者:
D
Daniel Graña
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add a LevelDB cache backend
上级
1c9effd7
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
71 addition
and
0 deletion
+71
-0
scrapy/contrib/httpcache.py
scrapy/contrib/httpcache.py
+63
-0
scrapy/tests/test_downloadermiddleware_httpcache.py
scrapy/tests/test_downloadermiddleware_httpcache.py
+7
-0
tox.ini
tox.ini
+1
-0
未找到文件。
scrapy/contrib/httpcache.py
浏览文件 @
752787e6
...
...
@@ -285,6 +285,69 @@ class FilesystemCacheStorage(object):
return
pickle
.
load
(
f
)
class
LeveldbCacheStorage
(
object
):
def
__init__
(
self
,
settings
):
import
leveldb
self
.
_leveldb
=
leveldb
self
.
cachedir
=
data_path
(
settings
[
'HTTPCACHE_DIR'
],
createdir
=
True
)
self
.
expiration_secs
=
settings
.
getint
(
'HTTPCACHE_EXPIRATION_SECS'
)
self
.
db
=
None
def
open_spider
(
self
,
spider
):
dbpath
=
os
.
path
.
join
(
self
.
cachedir
,
'%s.leveldb'
%
spider
.
name
)
self
.
db
=
self
.
_leveldb
.
LevelDB
(
dbpath
)
def
close_spider
(
self
,
spider
):
del
self
.
db
def
retrieve_response
(
self
,
spider
,
request
):
data
=
self
.
_read_data
(
spider
,
request
)
if
data
is
None
:
return
# not cached
url
=
data
[
'url'
]
status
=
data
[
'status'
]
headers
=
Headers
(
data
[
'headers'
])
body
=
data
[
'body'
]
respcls
=
responsetypes
.
from_args
(
headers
=
headers
,
url
=
url
)
response
=
respcls
(
url
=
url
,
headers
=
headers
,
status
=
status
,
body
=
body
)
return
response
def
store_response
(
self
,
spider
,
request
,
response
):
key
=
self
.
_request_key
(
request
)
data
=
{
'status'
:
response
.
status
,
'url'
:
response
.
url
,
'headers'
:
dict
(
response
.
headers
),
'body'
:
response
.
body
,
}
batch
=
self
.
_leveldb
.
WriteBatch
()
batch
.
Put
(
'%s_data'
%
key
,
pickle
.
dumps
(
data
,
protocol
=
2
))
batch
.
Put
(
'%s_time'
%
key
,
str
(
time
()))
self
.
db
.
Write
(
batch
)
def
_read_data
(
self
,
spider
,
request
):
key
=
self
.
_request_key
(
request
)
try
:
ts
=
self
.
db
.
Get
(
'%s_time'
%
key
)
except
KeyError
:
return
# not found or invalid entry
if
0
<
self
.
expiration_secs
<
time
()
-
float
(
ts
):
return
# expired
try
:
data
=
self
.
db
.
Get
(
'%s_data'
%
key
)
except
KeyError
:
return
# invalid entry
else
:
return
pickle
.
loads
(
data
)
def
_request_key
(
self
,
request
):
return
request_fingerprint
(
request
)
def
parse_cachecontrol
(
header
):
"""Parse Cache-Control header
...
...
scrapy/tests/test_downloadermiddleware_httpcache.py
浏览文件 @
752787e6
...
...
@@ -5,6 +5,7 @@ import shutil
import
unittest
import
email.utils
from
contextlib
import
contextmanager
import
pytest
from
scrapy.http
import
Response
,
HtmlResponse
,
Request
from
scrapy.spider
import
Spider
...
...
@@ -136,6 +137,12 @@ class FilesystemStorageTest(DefaultStorageTest):
storage_class
=
'scrapy.contrib.httpcache.FilesystemCacheStorage'
class
LeveldbStorageTest
(
DefaultStorageTest
):
pytest
.
importorskip
(
'leveldb'
)
storage_class
=
'scrapy.contrib.httpcache.LeveldbCacheStorage'
class
DummyPolicyTest
(
_BaseTest
):
policy_class
=
'scrapy.contrib.httpcache.DummyPolicy'
...
...
tox.ini
浏览文件 @
752787e6
...
...
@@ -15,6 +15,7 @@ deps =
boto
Pillow
django
leveldb
-rtests-requirements.txt
commands
=
py.test
--twisted
{posargs:scrapy}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录