未验证 提交 dfd4ab2b 编写于 作者: A Adrián Chaves 提交者: GitHub

Merge pull request #3 from elacuesta/retry-request-customization

Allow logger/stats customization in get_retry_request
......@@ -29,7 +29,8 @@ from scrapy.utils.response import response_status_message
from scrapy.core.downloader.handlers.http11 import TunnelError
from scrapy.utils.python import global_object_name
logger = logging.getLogger(__name__)
retry_logger = logging.getLogger(__name__)
def get_retry_request(
......@@ -39,6 +40,8 @@ def get_retry_request(
reason='unspecified',
max_retry_times=None,
priority_adjust=None,
logger=retry_logger,
stats_base_key='retry',
):
"""
Returns a new :class:`~scrapy.Request` object to retry the specified
......@@ -76,6 +79,11 @@ def get_retry_request(
*priority_adjust* is a number that determines how the priority of the new
request changes in relation to *request*. If not specified, the number is
read from the :setting:`RETRY_PRIORITY_ADJUST` setting.
*logger* is the logging.Logger object to be used when logging messages
*stats_base_key* is a string to be used as the base key for the
retry-related job stats
"""
settings = spider.crawler.settings
stats = spider.crawler.stats
......@@ -102,11 +110,11 @@ def get_retry_request(
if isinstance(reason, Exception):
reason = global_object_name(reason.__class__)
stats.inc_value('retry/count')
stats.inc_value(f'retry/reason_count/{reason}')
stats.inc_value(f'{stats_base_key}/count')
stats.inc_value(f'{stats_base_key}/reason_count/{reason}')
return new_request
else:
stats.inc_value('retry/max_reached')
stats.inc_value(f'{stats_base_key}/max_reached')
logger.error(
"Gave up retrying %(request)s (failed %(retry_times)d times): "
"%(reason)s",
......
import logging
import unittest
from testfixtures import LogCapture
from twisted.internet import defer
from twisted.internet.error import (
ConnectError,
......@@ -9,17 +12,12 @@ from twisted.internet.error import (
)
from twisted.web.client import ResponseFailed
from scrapy.downloadermiddlewares.retry import (
get_retry_request,
RetryMiddleware,
)
from scrapy.downloadermiddlewares.retry import get_retry_request, RetryMiddleware
from scrapy.exceptions import IgnoreRequest
from scrapy.http import Request, Response
from scrapy.spiders import Spider
from scrapy.utils.test import get_crawler
from testfixtures import LogCapture
class RetryTest(unittest.TestCase):
def setUp(self):
......@@ -597,6 +595,40 @@ class GetRetryRequestTest(unittest.TestCase):
)
)
def test_custom_logger(self):
logger = logging.getLogger("custom-logger")
request = Request("https://example.com")
spider = self.get_spider()
expected_reason = "because"
with LogCapture() as log:
get_retry_request(
request,
spider=spider,
reason=expected_reason,
logger=logger,
)
log.check_present(
(
"custom-logger",
"DEBUG",
f"Retrying {request} (failed 1 times): {expected_reason}",
)
)
def test_custom_stats_key(self):
request = Request("https://example.com")
spider = self.get_spider()
expected_reason = "because"
stats_key = "custom_retry"
get_retry_request(
request,
spider=spider,
reason=expected_reason,
stats_base_key=stats_key,
)
for stat in (f"{stats_key}/count", f"{stats_key}/reason_count/{expected_reason}"):
self.assertEqual(spider.crawler.stats.get_value(stat), 1)
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册