提交 75060d14 编写于 作者: M Mikhail Korobov

Fix for #612 + integration-style tests for HttpErrorMiddleware

上级 364790eb
......@@ -25,7 +25,7 @@ class HttpErrorMiddleware(object):
self.handle_httpstatus_list = settings.getlist('HTTPERROR_ALLOWED_CODES')
def process_spider_input(self, response, spider):
if 200 <= response.status < 300: # common case
if 200 <= response.status < 300: # common case
return
meta = response.meta
if 'handle_httpstatus_all' in meta:
......@@ -38,11 +38,14 @@ class HttpErrorMiddleware(object):
allowed_statuses = getattr(spider, 'handle_httpstatus_list', self.handle_httpstatus_list)
if response.status in allowed_statuses:
return
log.msg(format="Ignoring HTTP response code: not handled or not allowed: %(status_code)d",
level=log.DEBUG, spider=spider,
status_code=response.status)
raise HttpError(response, 'Ignoring non-200 response')
def process_spider_exception(self, response, exception, spider):
if isinstance(exception, HttpError):
log.msg(
format="Ignoring response %(response)r: HTTP status code is not handled or not allowed",
level=log.DEBUG,
spider=spider,
response=response
)
return []
from unittest import TestCase
from twisted.trial.unittest import TestCase as TrialTestCase
from twisted.internet import defer
from scrapy.utils.test import docrawl, get_testlog
from scrapy.tests.mockserver import MockServer
from scrapy.http import Response, Request
from scrapy.spider import Spider
from scrapy.contrib.spidermiddleware.httperror import HttpErrorMiddleware, HttpError
from scrapy.settings import Settings
class _HttpErrorSpider(Spider):
name = 'httperror'
start_urls = [
"http://localhost:8998/status?n=200",
"http://localhost:8998/status?n=404",
"http://localhost:8998/status?n=402",
"http://localhost:8998/status?n=500",
]
bypass_status_codes = set()
def __init__(self, *args, **kwargs):
super(_HttpErrorSpider, self).__init__(*args, **kwargs)
self.failed = set()
self.skipped = set()
self.parsed = set()
def start_requests(self):
for url in self.start_urls:
yield Request(url, self.parse, errback=self.on_error)
def parse(self, response):
self.parsed.add(response.url[-3:])
def on_error(self, failure):
if isinstance(failure.value, HttpError):
response = failure.value.response
if response.status in self.bypass_status_codes:
self.skipped.add(response.url[-3:])
return self.parse(response)
# it assumes there is a response attached to failure
self.failed.add(failure.value.response.url[-3:])
return failure
def _responses(request, status_codes):
responses = []
for code in status_codes:
......@@ -48,6 +88,7 @@ class TestHttpErrorMiddleware(TestCase):
self.assertEquals(None,
self.mw.process_spider_input(self.res404, self.spider))
class TestHttpErrorMiddlewareSettings(TestCase):
"""Similar test, but with settings"""
......@@ -85,6 +126,7 @@ class TestHttpErrorMiddlewareSettings(TestCase):
self.assertRaises(HttpError,
self.mw.process_spider_input, self.res402, self.spider)
class TestHttpErrorMiddlewareHandleAll(TestCase):
def setUp(self):
......@@ -112,3 +154,34 @@ class TestHttpErrorMiddlewareHandleAll(TestCase):
self.assertRaises(HttpError,
self.mw.process_spider_input, res402, self.spider)
class TestHttpErrorMiddlewareIntegrational(TrialTestCase):
def setUp(self):
self.mockserver = MockServer()
self.mockserver.__enter__()
def tearDown(self):
self.mockserver.__exit__(None, None, None)
@defer.inlineCallbacks
def test_middleware_works(self):
spider = _HttpErrorSpider()
yield docrawl(spider)
assert not spider.skipped, spider.skipped
self.assertEqual(spider.parsed, {'200'})
self.assertEqual(spider.failed, {'404', '402', '500'})
@defer.inlineCallbacks
def test_logging(self):
spider = _HttpErrorSpider(bypass_status_codes={402})
yield docrawl(spider)
# print(get_testlog())
self.assertEqual(spider.parsed, {'200', '402'})
self.assertEqual(spider.skipped, {'402'})
self.assertEqual(spider.failed, {'404', '500'})
log = get_testlog()
self.assertIn('Ignoring response <404', log)
self.assertIn('Ignoring response <500', log)
self.assertNotIn('Ignoring response <200', log)
self.assertNotIn('Ignoring response <402', log)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册