Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
looyolo
scrapy
提交
56e92d90
S
scrapy
项目概览
looyolo
/
scrapy
与 Fork 源项目一致
从无法访问的项目Fork
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
S
scrapy
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
56e92d90
编写于
7月 17, 2018
作者:
E
Eugenio Lacuesta
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Update tests
上级
e96b7782
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
45 addition
and
9 deletion
+45
-9
tests/test_spidermiddleware.py
tests/test_spidermiddleware.py
+45
-9
未找到文件。
tests/test_spidermiddleware.py
浏览文件 @
56e92d90
...
...
@@ -119,8 +119,10 @@ class GeneratorOutputChainSpider(Spider):
name
=
'GeneratorOutputChainSpider'
custom_settings
=
{
'SPIDER_MIDDLEWARES'
:
{
__name__
+
'.GeneratorFailOutputChainMiddleware'
:
10
,
__name__
+
'.GeneratorRecoverOutputChainMiddleware'
:
5
,
__name__
+
'.GeneratorFailMiddleware'
:
10
,
__name__
+
'.GeneratorDoNothingAfterFailureMiddleware'
:
8
,
__name__
+
'.GeneratorRecoverMiddleware'
:
5
,
__name__
+
'.GeneratorDoNothingAfterRecoveryMiddleware'
:
3
,
},
}
...
...
@@ -128,10 +130,23 @@ class GeneratorOutputChainSpider(Spider):
yield
Request
(
self
.
mockserver
.
url
(
'/status?n=200'
))
def
parse
(
self
,
response
):
yield
{
'processed'
:
[
'parse'
]}
yield
{
'processed'
:
[
'parse-first-item'
]}
yield
{
'processed'
:
[
'parse-second-item'
]}
class
GeneratorFailOutputChainMiddleware
:
class
_GeneratorDoNothingMiddleware
:
def
process_spider_output
(
self
,
response
,
result
,
spider
):
for
r
in
result
:
r
[
'processed'
].
append
(
'{}.process_spider_output'
.
format
(
self
.
__class__
.
__name__
))
yield
r
def
process_spider_exception
(
self
,
response
,
exception
,
spider
):
method
=
'{}.process_spider_exception'
.
format
(
self
.
__class__
.
__name__
)
logging
.
info
(
'%s: %s caught'
,
method
,
exception
.
__class__
.
__name__
)
return
None
class
GeneratorFailMiddleware
:
def
process_spider_output
(
self
,
response
,
result
,
spider
):
for
r
in
result
:
r
[
'processed'
].
append
(
'{}.process_spider_output'
.
format
(
self
.
__class__
.
__name__
))
...
...
@@ -144,7 +159,11 @@ class GeneratorFailOutputChainMiddleware:
yield
{
'processed'
:
[
method
]}
class
GeneratorRecoverOutputChainMiddleware
:
class
GeneratorDoNothingAfterFailureMiddleware
(
_GeneratorDoNothingMiddleware
):
pass
class
GeneratorRecoverMiddleware
:
def
process_spider_output
(
self
,
response
,
result
,
spider
):
for
r
in
result
:
r
[
'processed'
].
append
(
'{}.process_spider_output'
.
format
(
self
.
__class__
.
__name__
))
...
...
@@ -155,6 +174,9 @@ class GeneratorRecoverOutputChainMiddleware:
logging
.
info
(
'%s: %s caught'
,
method
,
exception
.
__class__
.
__name__
)
yield
{
'processed'
:
[
method
]}
class
GeneratorDoNothingAfterRecoveryMiddleware
(
_GeneratorDoNothingMiddleware
):
pass
# ================================================================================
class
TestSpiderMiddleware
(
TestCase
):
...
...
@@ -227,12 +249,26 @@ class TestSpiderMiddleware(TestCase):
"""
(4) An exception from a middleware's process_spider_output method should be sent
to the process_spider_exception method from the next middleware in the chain.
The result of the recovery by the process_spider_exception method should be handled
by the process_spider_output method from the next middleware.
The final item count should be 2 (one from the spider callback and one from the
process_spider_exception chain)
"""
log4
=
yield
self
.
crawl_log
(
GeneratorOutputChainSpider
)
self
.
assertIn
(
"'item_scraped_count': 2"
,
str
(
log4
))
self
.
assertIn
(
"GeneratorRecoverOutputChainMiddleware.process_spider_exception: LookupError caught"
,
str
(
log4
))
self
.
assertNotIn
(
"GeneratorFailOutputChainMiddleware.process_spider_exception: LookupError caught"
,
str
(
log4
))
self
.
assertIn
(
"{'processed': ['parse', 'GeneratorFailOutputChainMiddleware.process_spider_output', 'GeneratorRecoverOutputChainMiddleware.process_spider_output']}"
,
str
(
log4
))
self
.
assertIn
(
"{'processed': ['GeneratorRecoverOutputChainMiddleware.process_spider_exception']}"
,
str
(
log4
))
self
.
assertIn
(
"GeneratorRecoverMiddleware.process_spider_exception: LookupError caught"
,
str
(
log4
))
self
.
assertIn
(
"GeneratorDoNothingAfterFailureMiddleware.process_spider_exception: LookupError caught"
,
str
(
log4
))
self
.
assertNotIn
(
"GeneratorFailMiddleware.process_spider_exception: LookupError caught"
,
str
(
log4
))
self
.
assertNotIn
(
"GeneratorDoNothingAfterRecoveryMiddleware.process_spider_exception: LookupError caught"
,
str
(
log4
))
item_from_callback
=
{
'processed'
:
[
'parse-first-item'
,
'GeneratorFailMiddleware.process_spider_output'
,
'GeneratorDoNothingAfterFailureMiddleware.process_spider_output'
,
'GeneratorRecoverMiddleware.process_spider_output'
,
'GeneratorDoNothingAfterRecoveryMiddleware.process_spider_output'
]}
item_recovered
=
{
'processed'
:
[
'GeneratorRecoverMiddleware.process_spider_exception'
,
'GeneratorDoNothingAfterRecoveryMiddleware.process_spider_output'
]}
self
.
assertIn
(
str
(
item_from_callback
),
str
(
log4
))
self
.
assertIn
(
str
(
item_recovered
),
str
(
log4
))
self
.
assertNotIn
(
'parse-second-item'
,
str
(
log4
))
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录