提交 495152bd 编写于 作者: P Pablo Hoffman

disabled verbose depth stats collection by default, added DEPTH_STATS_VERBOSE setting to enable it

上级 accb6ed8
......@@ -323,7 +323,17 @@ DEPTH_STATS
Default: ``True``
Whether to collect depth stats.
Whether to collect maximum depth stats.
.. setting:: DEPTH_STATS_VERBOSE
DEPTH_STATS_VERBOSE
-------------------
Default: ``False``
Whether to collect verbose depth stats. If this is enabled, the number of
requests for each depth is collected in the stats.
.. setting:: DOWNLOADER_DEBUG
......
......@@ -9,9 +9,10 @@ from scrapy.http import Request
class DepthMiddleware(object):
def __init__(self, maxdepth, stats=None):
def __init__(self, maxdepth, stats=None, verbose_stats=False):
self.maxdepth = maxdepth
self.stats = stats
self.verbose_stats = verbose_stats
if self.stats and self.maxdepth:
stats.set_value('envinfo/request_depth_limit', maxdepth)
......@@ -19,11 +20,12 @@ class DepthMiddleware(object):
def from_settings(cls, settings):
maxdepth = settings.getint('DEPTH_LIMIT')
usestats = settings.getbool('DEPTH_STATS')
verbose = settings.getbool('DEPTH_STATS_VERBOSE')
if usestats:
from scrapy.stats import stats
else:
stats = None
return cls(maxdepth, stats)
return cls(maxdepth, stats, verbose)
def process_spider_output(self, response, result, spider):
def _filter(request):
......@@ -35,14 +37,15 @@ class DepthMiddleware(object):
level=log.DEBUG, spider=spider)
return False
elif self.stats:
self.stats.inc_value('request_depth_count/%s' % depth, spider=spider)
if depth > self.stats.get_value('request_depth_max', 0, spider=spider):
self.stats.set_value('request_depth_max', depth, spider=spider)
if self.verbose_stats:
self.stats.inc_value('request_depth_count/%s' % depth, spider=spider)
self.stats.max_value('request_depth_max', depth, spider=spider)
return True
# base case (depth=0)
if self.stats and 'depth' not in response.request.meta:
response.request.meta['depth'] = 0
self.stats.inc_value('request_depth_count/0', spider=spider)
if self.verbose_stats:
self.stats.inc_value('request_depth_count/0', spider=spider)
return (r for r in result or () if _filter(r))
......@@ -14,7 +14,7 @@ class TestDepthMiddleware(TestCase):
self.stats = StatsCollector()
self.stats.open_spider(self.spider)
self.mw = DepthMiddleware(1, self.stats)
self.mw = DepthMiddleware(1, self.stats, True)
self.assertEquals(self.stats.get_value('envinfo/request_depth_limit'), 1)
def test_process_spider_output(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册