提交 752787e6 编写于 作者: D Daniel Graña

Add a LevelDB cache backend

上级 1c9effd7
......@@ -285,6 +285,69 @@ class FilesystemCacheStorage(object):
return pickle.load(f)
class LeveldbCacheStorage(object):
def __init__(self, settings):
import leveldb
self._leveldb = leveldb
self.cachedir = data_path(settings['HTTPCACHE_DIR'], createdir=True)
self.expiration_secs = settings.getint('HTTPCACHE_EXPIRATION_SECS')
self.db = None
def open_spider(self, spider):
dbpath = os.path.join(self.cachedir, '%s.leveldb' % spider.name)
self.db = self._leveldb.LevelDB(dbpath)
def close_spider(self, spider):
del self.db
def retrieve_response(self, spider, request):
data = self._read_data(spider, request)
if data is None:
return # not cached
url = data['url']
status = data['status']
headers = Headers(data['headers'])
body = data['body']
respcls = responsetypes.from_args(headers=headers, url=url)
response = respcls(url=url, headers=headers, status=status, body=body)
return response
def store_response(self, spider, request, response):
key = self._request_key(request)
data = {
'status': response.status,
'url': response.url,
'headers': dict(response.headers),
'body': response.body,
}
batch = self._leveldb.WriteBatch()
batch.Put('%s_data' % key, pickle.dumps(data, protocol=2))
batch.Put('%s_time' % key, str(time()))
self.db.Write(batch)
def _read_data(self, spider, request):
key = self._request_key(request)
try:
ts = self.db.Get('%s_time' % key)
except KeyError:
return # not found or invalid entry
if 0 < self.expiration_secs < time() - float(ts):
return # expired
try:
data = self.db.Get('%s_data' % key)
except KeyError:
return # invalid entry
else:
return pickle.loads(data)
def _request_key(self, request):
return request_fingerprint(request)
def parse_cachecontrol(header):
"""Parse Cache-Control header
......
......@@ -5,6 +5,7 @@ import shutil
import unittest
import email.utils
from contextlib import contextmanager
import pytest
from scrapy.http import Response, HtmlResponse, Request
from scrapy.spider import Spider
......@@ -136,6 +137,12 @@ class FilesystemStorageTest(DefaultStorageTest):
storage_class = 'scrapy.contrib.httpcache.FilesystemCacheStorage'
class LeveldbStorageTest(DefaultStorageTest):
pytest.importorskip('leveldb')
storage_class = 'scrapy.contrib.httpcache.LeveldbCacheStorage'
class DummyPolicyTest(_BaseTest):
policy_class = 'scrapy.contrib.httpcache.DummyPolicy'
......
......@@ -15,6 +15,7 @@ deps =
boto
Pillow
django
leveldb
-rtests-requirements.txt
commands =
py.test --twisted {posargs:scrapy}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册