提交 59cf9d9b 编写于 作者: M Martin Olveyra

allow to set minimal download delay for autothrottle extension. also

limit download delay to a minimal of spider.download_delay if given
上级 fc52d8d5
......@@ -69,12 +69,15 @@ class AutoThrottle(object):
self.CONCURRENCY_CHECK_PERIOD = settings.getint("AUTOTHROTTLE_CONCURRENCY_CHECK_PERIOD", 10)
self.MAX_CONCURRENCY = settings.getint("AUTOTHROTTLE_MAX_CONCURRENCY", 8)
self.DEBUG = settings.getint("AUTOTHROTTLE_DEBUG", False)
self.MIN_DOWNLOAD_DELAY = settings.getint("AUTOTHROTTLE_MIN_DOWNLOAD_DELAY")
@classmethod
def from_crawler(cls, crawler):
return cls(crawler)
def spider_opened(self, spider):
if hasattr(spider, "download_delay"):
self.MIN_DOWNLOAD_DELAY = spider.download_delay
spider.download_delay = self.START_DELAY
if hasattr(spider, "max_concurrent_requests"):
self.MAX_CONCURRENCY = spider.max_concurrent_requests
......@@ -124,6 +127,9 @@ class AutoThrottle(object):
# if latency is bigger than old delay, then use latency instead of mean. Works better with problematic sites
new_delay = (slot.delay + latency) / 2.0 if latency < slot.delay else latency
if new_delay < self.MIN_DOWNLOAD_DELAY:
new_delay = self.MIN_DOWNLOAD_DELAY
# dont adjust delay if response status != 200 and new delay is smaller than old one,
# as error pages (and redirections) are usually small and so tend to reduce latency, thus provoking a positive feedback
# by reducing delay instead of increase.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册