提交 766f2d91 编写于 作者: P Pablo Hoffman

Renamed Request Handlers to Download Handlers

上级 067ec65d
......@@ -416,6 +416,34 @@ Another way to change the download delay (per spider, instead of globally) is
by using the ``download_delay`` spider attribute, which takes more precedence
than this setting.
.. setting:: DOWNLOAD_HANDLERS
DOWNLOAD_HANDLERS
-----------------
Default: ``{}``
A dict containing the request downloader handlers enabled in your project.
See `DOWNLOAD_HANDLERS_BASE` for example format.
.. setting:: DOWNLOAD_HANDLERS_BASE
DOWNLOAD_HANDLERS_BASE
----------------------
Default::
{
'file': 'scrapy.core.downloader.handlers.file.FileDownloadHandler',
'http': 'scrapy.core.downloader.handlers.http.HttpDownloadHandler',
'https': 'scrapy.core.downloader.handlers.http.HttpDownloadHandler',
's3': 'scrapy.core.downloader.handlers.s3.S3DownloadHandler',
}
A dict containing the request download handlers enabled by default in Scrapy.
You should never modify this setting in your project, modify
:setting:`DOWNLOAD_HANDLERS` instead.
.. setting:: DOWNLOAD_TIMEOUT
DOWNLOAD_TIMEOUT
......@@ -756,34 +784,6 @@ Default: ``+2``
Adjust redirect request priority relative to original request.
A negative priority adjust means more priority.
.. setting:: REQUEST_HANDLERS
REQUEST_HANDLERS
----------------
Default: ``{}``
A dict containing the request downloader handlers enabled in your project.
See `REQUEST_HANDLERS_BASE` for example format.
.. setting:: REQUEST_HANDLERS_BASE
REQUEST_HANDLERS_BASE
---------------------
Default::
{
'file': 'scrapy.core.downloader.handlers.file.download_file',
'http': 'scrapy.core.downloader.handlers.http.download_http',
'https': 'scrapy.core.downloader.handlers.http.download_http',
's3': 'scrapy.core.downloader.handlers.s3.S3RequestHandler',
}
A dict containing the request download handlers enabled by default in Scrapy.
You should never modify this setting in your project, modify
:setting:`REQUEST_HANDLERS` instead.
.. setting:: REQUESTS_QUEUE_SIZE
REQUESTS_QUEUE_SIZE
......
......@@ -42,6 +42,15 @@ DEPTH_LIMIT = 0
DEPTH_STATS = True
DOWNLOAD_DELAY = 0
DOWNLOAD_HANDLERS = {}
DOWNLOAD_HANDLERS_BASE = {
'file': 'scrapy.core.downloader.handlers.file.FileDownloadHandler',
'http': 'scrapy.core.downloader.handlers.http.HttpDownloadHandler',
'https': 'scrapy.core.downloader.handlers.http.HttpDownloadHandler',
's3': 'scrapy.core.downloader.handlers.s3.S3DownloadHandler',
}
DOWNLOAD_TIMEOUT = 180 # 3mins
DOWNLOADER_DEBUG = False
......@@ -183,14 +192,6 @@ REDIRECT_MAX_METAREFRESH_DELAY = 100
REDIRECT_MAX_TIMES = 20 # uses Firefox default setting
REDIRECT_PRIORITY_ADJUST = +2
REQUEST_HANDLERS = {}
REQUEST_HANDLERS_BASE = {
'file': 'scrapy.core.downloader.handlers.file.FileRequestHandler',
'http': 'scrapy.core.downloader.handlers.http.HttpRequestHandler',
'https': 'scrapy.core.downloader.handlers.http.HttpRequestHandler',
's3': 'scrapy.core.downloader.handlers.s3.S3RequestHandler',
}
REQUESTS_QUEUE_SIZE = 0
# contrib.middleware.retry.RetryMiddleware default settings
......
......@@ -6,13 +6,13 @@ from scrapy.conf import settings
from scrapy.utils.misc import load_object
class RequestHandlers(object):
class DownloadHandlers(object):
def __init__(self):
self._handlers = {}
self._notconfigured = {}
handlers = settings.get('REQUEST_HANDLERS_BASE')
handlers.update(settings.get('REQUEST_HANDLERS', {}))
handlers = settings.get('DOWNLOAD_HANDLERS_BASE')
handlers.update(settings.get('DOWNLOAD_HANDLERS', {}))
for scheme, clspath in handlers.iteritems():
cls = load_object(clspath)
try:
......
......@@ -2,7 +2,7 @@ from scrapy.core.downloader.responsetypes import responsetypes
from scrapy.utils.url import file_uri_to_path
from scrapy.utils.decorator import defers
class FileRequestHandler(object):
class FileDownloadHandler(object):
@defers
def download_request(self, request, spider):
......
......@@ -17,7 +17,7 @@ HTTPClientFactory = load_object(settings['DOWNLOADER_HTTPCLIENTFACTORY'])
DOWNLOAD_TIMEOUT = settings.getint('DOWNLOAD_TIMEOUT')
class HttpRequestHandler(object):
class HttpDownloadHandler(object):
def __init__(self, httpclientfactory=HTTPClientFactory, \
download_timeout=DOWNLOAD_TIMEOUT):
......
......@@ -2,13 +2,13 @@ from scrapy import optional_features
from scrapy.exceptions import NotConfigured
from scrapy.utils.httpobj import urlparse_cached
from scrapy.conf import settings
from .http import HttpRequestHandler
from .http import HttpDownloadHandler
class S3RequestHandler(object):
class S3DownloadHandler(object):
def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, \
httprequesthandler=HttpRequestHandler):
httpdownloadhandler=HttpDownloadHandler):
if 'boto' not in optional_features:
raise NotConfigured("missing boto library")
......@@ -22,7 +22,7 @@ class S3RequestHandler(object):
self.conn = connect_s3(aws_access_key_id, aws_secret_access_key)
except Exception, ex:
raise NotConfigured(str(ex))
self._download_http = httprequesthandler().download_request
self._download_http = httpdownloadhandler().download_request
def download_request(self, request, spider):
p = urlparse_cached(request)
......
......@@ -13,7 +13,7 @@ from scrapy.conf import settings
from scrapy.utils.defer import mustbe_deferred
from scrapy import log
from .middleware import DownloaderMiddlewareManager
from .handlers import RequestHandlers
from .handlers import DownloadHandlers
class SpiderInfo(object):
......@@ -71,7 +71,7 @@ class Downloader(object):
def __init__(self):
self.sites = {}
self.handlers = RequestHandlers()
self.handlers = DownloadHandlers()
self.middleware = DownloaderMiddlewareManager()
self.concurrent_spiders = settings.getint('CONCURRENT_SPIDERS')
......
......@@ -10,9 +10,9 @@ from twisted.web.test.test_webclient import ForeverTakingResource, \
PayloadResource, BrokenDownloadResource
from scrapy.core.downloader.webclient import PartialDownloadError
from scrapy.core.downloader.handlers.file import FileRequestHandler
from scrapy.core.downloader.handlers.http import HttpRequestHandler
from scrapy.core.downloader.handlers.s3 import S3RequestHandler
from scrapy.core.downloader.handlers.file import FileDownloadHandler
from scrapy.core.downloader.handlers.http import HttpDownloadHandler
from scrapy.core.downloader.handlers.s3 import S3DownloadHandler
from scrapy.spider import BaseSpider
from scrapy.http import Request
from scrapy.utils.url import path_to_file_uri
......@@ -26,7 +26,7 @@ class FileTestCase(unittest.TestCase):
fd = open(self.tmpname + '^', 'w')
fd.write('0123456789')
fd.close()
self.download_request = FileRequestHandler().download_request
self.download_request = FileDownloadHandler().download_request
def test_download(self):
def _test(response):
......@@ -61,7 +61,7 @@ class HttpTestCase(unittest.TestCase):
self.wrapper = WrappingFactory(self.site)
self.port = reactor.listenTCP(0, self.wrapper, interface='127.0.0.1')
self.portno = self.port.getHost().port
self.download_request = HttpRequestHandler().download_request
self.download_request = HttpDownloadHandler().download_request
def tearDown(self):
return self.port.stopListening()
......@@ -156,7 +156,7 @@ class HttpProxyTestCase(unittest.TestCase):
wrapper = WrappingFactory(site)
self.port = reactor.listenTCP(0, wrapper, interface='127.0.0.1')
self.portno = self.port.getHost().port
self.download_request = HttpRequestHandler().download_request
self.download_request = HttpDownloadHandler().download_request
def tearDown(self):
return self.port.stopListening()
......@@ -184,7 +184,7 @@ class HttpProxyTestCase(unittest.TestCase):
return self.download_request(request, BaseSpider('foo')).addCallback(_test)
class HttpRequestHandlerMock(object):
class HttpDownloadHandlerMock(object):
def download_request(self, request, spider):
return request
......@@ -199,9 +199,9 @@ class S3TestCase(unittest.TestCase):
AWS_SECRET_ACCESS_KEY = 'uV3F3YluFJax1cknvbcGwgjvx4QpvB+leU8dUj2o'
def setUp(self):
s3reqh = S3RequestHandler(self.AWS_ACCESS_KEY_ID, \
s3reqh = S3DownloadHandler(self.AWS_ACCESS_KEY_ID, \
self.AWS_SECRET_ACCESS_KEY, \
httprequesthandler=HttpRequestHandlerMock)
httpdownloadhandler=HttpDownloadHandlerMock)
self.download_request = s3reqh.download_request
self.spider = BaseSpider('foo')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册