From f766dd0ba8afce0ec9890106fe0fd477aaf04d29 Mon Sep 17 00:00:00 2001 From: Mikhail Korobov Date: Wed, 17 Feb 2016 23:07:03 +0500 Subject: [PATCH] Preserve tracebacks better. Fixes GH-1760. --- scrapy/crawler.py | 15 +++++++++++++-- tests/test_commands.py | 14 ++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/scrapy/crawler.py b/scrapy/crawler.py index ef99c243a..3e695f793 100644 --- a/scrapy/crawler.py +++ b/scrapy/crawler.py @@ -3,6 +3,7 @@ import signal import logging import warnings +import sys from twisted.internet import reactor, defer from zope.interface.verify import verifyClass, DoesNotImplement @@ -73,11 +74,21 @@ class Crawler(object): yield self.engine.open_spider(self.spider, start_requests) yield defer.maybeDeferred(self.engine.start) except Exception: - exc = defer.fail() + # In Python 2 reraising an exception after yield discards + # the original traceback (see http://bugs.python.org/issue7563), + # so sys.exc_info() workaround is used. + # This workaround also works in Python 3, but it is not needed, + # and it is slower, so in Python 3 we use native `raise`. + if six.PY2: + exc_info = sys.exc_info() + self.crawling = False if self.engine is not None: yield self.engine.close() - yield exc + + if six.PY2: + raise six.reraise(*exc_info) + raise def _create_spider(self, *args, **kwargs): return self.spidercls.from_crawler(self, *args, **kwargs) diff --git a/tests/test_commands.py b/tests/test_commands.py index 5d45cd62d..93b53dbeb 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -204,6 +204,20 @@ class MySpider(scrapy.Spider): log = to_native_str(p.stderr.read()) self.assertIn("Unable to load", log) + def test_start_requests_errors(self): + p = self.runspider(""" +import scrapy + +class BadSpider(scrapy.Spider): + name = "bad" + def start_requests(self): + raise Exception("oops!") + """, name="badspider.py") + log = to_native_str(p.stderr.read()) + print(log) + self.assertIn("start_requests", log) + self.assertIn("badspider.py", log) + class ParseCommandTest(ProcessTest, SiteTest, CommandTest): command = 'parse' -- GitLab