Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
looyolo
scrapy
提交
dfd4ab2b
S
scrapy
项目概览
looyolo
/
scrapy
与 Fork 源项目一致
从无法访问的项目Fork
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
S
scrapy
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
dfd4ab2b
编写于
3月 03, 2021
作者:
A
Adrián Chaves
提交者:
GitHub
3月 03, 2021
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3 from elacuesta/retry-request-customization
Allow logger/stats customization in get_retry_request
上级
1f7665c4
9e623552
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
50 addition
and
10 deletion
+50
-10
scrapy/downloadermiddlewares/retry.py
scrapy/downloadermiddlewares/retry.py
+12
-4
tests/test_downloadermiddleware_retry.py
tests/test_downloadermiddleware_retry.py
+38
-6
未找到文件。
scrapy/downloadermiddlewares/retry.py
浏览文件 @
dfd4ab2b
...
...
@@ -29,7 +29,8 @@ from scrapy.utils.response import response_status_message
from
scrapy.core.downloader.handlers.http11
import
TunnelError
from
scrapy.utils.python
import
global_object_name
logger
=
logging
.
getLogger
(
__name__
)
retry_logger
=
logging
.
getLogger
(
__name__
)
def
get_retry_request
(
...
...
@@ -39,6 +40,8 @@ def get_retry_request(
reason
=
'unspecified'
,
max_retry_times
=
None
,
priority_adjust
=
None
,
logger
=
retry_logger
,
stats_base_key
=
'retry'
,
):
"""
Returns a new :class:`~scrapy.Request` object to retry the specified
...
...
@@ -76,6 +79,11 @@ def get_retry_request(
*priority_adjust* is a number that determines how the priority of the new
request changes in relation to *request*. If not specified, the number is
read from the :setting:`RETRY_PRIORITY_ADJUST` setting.
*logger* is the logging.Logger object to be used when logging messages
*stats_base_key* is a string to be used as the base key for the
retry-related job stats
"""
settings
=
spider
.
crawler
.
settings
stats
=
spider
.
crawler
.
stats
...
...
@@ -102,11 +110,11 @@ def get_retry_request(
if
isinstance
(
reason
,
Exception
):
reason
=
global_object_name
(
reason
.
__class__
)
stats
.
inc_value
(
'retry
/count'
)
stats
.
inc_value
(
f
'
retry
/reason_count/
{
reason
}
'
)
stats
.
inc_value
(
f
'
{
stats_base_key
}
/count'
)
stats
.
inc_value
(
f
'
{
stats_base_key
}
/reason_count/
{
reason
}
'
)
return
new_request
else
:
stats
.
inc_value
(
'retry
/max_reached'
)
stats
.
inc_value
(
f
'
{
stats_base_key
}
/max_reached'
)
logger
.
error
(
"Gave up retrying %(request)s (failed %(retry_times)d times): "
"%(reason)s"
,
...
...
tests/test_downloadermiddleware_retry.py
浏览文件 @
dfd4ab2b
import
logging
import
unittest
from
testfixtures
import
LogCapture
from
twisted.internet
import
defer
from
twisted.internet.error
import
(
ConnectError
,
...
...
@@ -9,17 +12,12 @@ from twisted.internet.error import (
)
from
twisted.web.client
import
ResponseFailed
from
scrapy.downloadermiddlewares.retry
import
(
get_retry_request
,
RetryMiddleware
,
)
from
scrapy.downloadermiddlewares.retry
import
get_retry_request
,
RetryMiddleware
from
scrapy.exceptions
import
IgnoreRequest
from
scrapy.http
import
Request
,
Response
from
scrapy.spiders
import
Spider
from
scrapy.utils.test
import
get_crawler
from
testfixtures
import
LogCapture
class
RetryTest
(
unittest
.
TestCase
):
def
setUp
(
self
):
...
...
@@ -597,6 +595,40 @@ class GetRetryRequestTest(unittest.TestCase):
)
)
def
test_custom_logger
(
self
):
logger
=
logging
.
getLogger
(
"custom-logger"
)
request
=
Request
(
"https://example.com"
)
spider
=
self
.
get_spider
()
expected_reason
=
"because"
with
LogCapture
()
as
log
:
get_retry_request
(
request
,
spider
=
spider
,
reason
=
expected_reason
,
logger
=
logger
,
)
log
.
check_present
(
(
"custom-logger"
,
"DEBUG"
,
f
"Retrying
{
request
}
(failed 1 times):
{
expected_reason
}
"
,
)
)
def
test_custom_stats_key
(
self
):
request
=
Request
(
"https://example.com"
)
spider
=
self
.
get_spider
()
expected_reason
=
"because"
stats_key
=
"custom_retry"
get_retry_request
(
request
,
spider
=
spider
,
reason
=
expected_reason
,
stats_base_key
=
stats_key
,
)
for
stat
in
(
f
"
{
stats_key
}
/count"
,
f
"
{
stats_key
}
/reason_count/
{
expected_reason
}
"
):
self
.
assertEqual
(
spider
.
crawler
.
stats
.
get_value
(
stat
),
1
)
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录