提交 900a4876 编写于 作者: J Julia Medina

Support multiple simultaneous LogObservers listening different crawlers

上级 89df18bd
......@@ -10,7 +10,11 @@ logging`_ but this may change in the future.
.. _Twisted logging: http://twistedmatrix.com/projects/core/documentation/howto/logging.html
The logging service must be explicitly started through the :func:`scrapy.log.start` function.
The logging service must be explicitly started through the
:func:`scrapy.log.start` function to catch the top level Scrapy's log messages.
On top of that, each crawler has its own independent log observer
(automatically attached when it's created) that intercepts its spider's log
messages.
.. _topics-logging-levels:
......@@ -55,8 +59,11 @@ scrapy.log module
.. function:: start(logfile=None, loglevel=None, logstdout=None)
Start the logging facility. This must be called before actually logging any
messages. Otherwise, messages logged before this call will get lost.
Start the top level Scrapy logger. This must be called before actually
logging any top level messages (those logged using this module's
:func:`~scrapy.log.msg` function instead of the :meth:`Spider.log
<scrapy.spider.Spider.log>` method). Otherwise, messages logged before this
call will get lost.
:param logfile: the file path to use for logging output. If omitted, the
:setting:`LOG_FILE` setting will be used. If both are ``None``, the log
......
......@@ -128,6 +128,7 @@ class CrawlerProcess(CrawlerRunner):
signame = signal_names[signum]
log.msg(format='Received %(signame)s twice, forcing unclean shutdown',
level=log.INFO, signame=signame)
self._stop_logging()
reactor.callFromThread(self._stop_reactor)
def start(self, stop_after_crawl=True):
......@@ -135,6 +136,7 @@ class CrawlerProcess(CrawlerRunner):
self._start_reactor(stop_after_crawl)
def _start_logging(self):
self.log_observer = log.start_from_settings(self.settings)
log.scrapy_info(self.settings)
def _start_reactor(self, stop_after_crawl=True):
......@@ -149,6 +151,9 @@ class CrawlerProcess(CrawlerRunner):
reactor.addSystemEventTrigger('before', 'shutdown', self.stop)
reactor.run(installSignalHandlers=False) # blocking call
def _stop_logging(self):
self.log_observer.stop()
def _stop_reactor(self, _=None):
try:
reactor.stop()
......
......@@ -35,15 +35,16 @@ class ScrapyFileLogObserver(log.FileLogObserver):
def __init__(self, f, level=INFO, encoding='utf-8', crawler=None):
self.level = level
self.encoding = encoding
self.crawler = crawler
if crawler:
self.crawler = crawler
self.emit = self._emit_with_crawler
else:
self.emit = self._emit
log.FileLogObserver.__init__(self, f)
def _emit(self, eventDict):
ev = _adapt_eventdict(eventDict, self.level, self.encoding)
ev = _adapt_eventdict(eventDict, self.level, self.encoding,
self.crawler)
if ev is not None:
log.FileLogObserver.emit(self, ev)
return ev
......@@ -55,7 +56,8 @@ class ScrapyFileLogObserver(log.FileLogObserver):
sname = 'log_count/%s' % level_names.get(level, level)
self.crawler.stats.inc_value(sname)
def _adapt_eventdict(eventDict, log_level=INFO, encoding='utf-8', prepend_level=True):
def _adapt_eventdict(eventDict, log_level=INFO, encoding='utf-8',
crawler=None, prepend_level=True):
"""Adapt Twisted log eventDict making it suitable for logging with a Scrapy
log observer. It may return None to indicate that the event should be
ignored by a Scrapy log observer.
......@@ -78,6 +80,12 @@ def _adapt_eventdict(eventDict, log_level=INFO, encoding='utf-8', prepend_level=
spider = ev.get('spider')
if spider:
ev['system'] = unicode_to_str(spider.name, encoding)
if crawler and (not spider or spider.crawler is not crawler):
# ignore events not triggered by own spiders in crawlers' observers
return
if not crawler and spider:
# ignore spiders' events in observers without crawler
return
lvlname = level_names.get(level, 'NOLEVEL')
message = ev.get('message')
......@@ -140,18 +148,14 @@ def start_from_settings(settings, crawler=None):
settings['LOG_ENCODING'], crawler)
def scrapy_info(settings):
log_observer = start_from_settings(settings)
if log_observer:
msg("Scrapy %s started (bot: %s)" % (scrapy.__version__,
settings['BOT_NAME']))
msg("Optional features available: %s" % ", ".join(scrapy.optional_features),
level=INFO)
msg("Scrapy %s started (bot: %s)" % (scrapy.__version__,
settings['BOT_NAME']))
d = dict(overridden_settings(settings))
msg(format="Overridden settings: %(settings)r", settings=d, level=INFO)
msg("Optional features available: %s" % ", ".join(scrapy.optional_features),
level=INFO)
log_observer.stop()
d = dict(overridden_settings(settings))
msg(format="Overridden settings: %(settings)r", settings=d, level=INFO)
def start_from_crawler(crawler):
return start_from_settings(crawler.settings, crawler)
......@@ -6,6 +6,7 @@ from twisted.trial import unittest
from scrapy import log
from scrapy.spider import Spider
from scrapy.settings import default_settings
from scrapy.utils.test import get_crawler
class LogTest(unittest.TestCase):
......@@ -40,10 +41,10 @@ class ScrapyFileLogObserverTest(unittest.TestCase):
log.msg("Hello")
self.assertEqual(self.logged(), "[scrapy] INFO: Hello")
def test_msg_spider(self):
def test_msg_ignore_spider(self):
spider = Spider("myspider")
log.msg("Hello", spider=spider)
self.assertEqual(self.logged(), "[myspider] INFO: Hello")
self.failIf(self.logged())
def test_msg_level1(self):
log.msg("Hello", level=log.WARNING)
......@@ -57,11 +58,6 @@ class ScrapyFileLogObserverTest(unittest.TestCase):
log.msg("Hello", level=9999)
self.assertEqual(self.logged(), "[scrapy] NOLEVEL: Hello")
def test_msg_level_spider(self):
spider = Spider("myspider")
log.msg("Hello", spider=spider, level=log.WARNING)
self.assertEqual(self.logged(), "[myspider] WARNING: Hello")
def test_msg_encoding(self):
log.msg(u"Price: \xa3100")
self.assertEqual(self.logged(), "[scrapy] INFO: Price: \xc2\xa3100")
......@@ -133,5 +129,41 @@ class Latin1ScrapyFileLogObserverTest(ScrapyFileLogObserverTest):
# self.assertEqual(self.first_log_line(), "[scrapy] ERROR: \xa3")
class CrawlerScrapyFileLogObserverTest(unittest.TestCase):
def setUp(self):
self.f = BytesIO()
self.crawler = get_crawler(Spider)
self.spider = self.crawler.spider = self.crawler._create_spider('test')
self.log_observer = log.ScrapyFileLogObserver(self.f, log.INFO,
'utf-8', self.crawler)
self.log_observer.start()
def tearDown(self):
self.flushLoggedErrors()
self.log_observer.stop()
def logged(self):
return self.f.getvalue().strip()[25:]
def test_msg_basic(self):
log.msg("Hello", spider=self.spider)
self.assertEqual(self.logged(), "[test] INFO: Hello")
def test_msg_ignore_scrapy_channel(self):
log.msg("Hello")
self.failIf(self.logged())
def test_msg_ignore_another_crawler(self):
crawler = get_crawler(Spider)
log.msg("Hello", spider=crawler._create_spider('test'))
self.failIf(self.logged())
def test_msg_stats_log(self):
assert self.crawler.stats.get_value('log_count/INFO', 0) == 0
log.msg("Hello", spider=self.spider)
self.assertEqual(self.crawler.stats.get_value('log_count/INFO'), 1)
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册