Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
looyolo
scrapy
提交
de82ca85
S
scrapy
项目概览
looyolo
/
scrapy
与 Fork 源项目一致
从无法访问的项目Fork
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
S
scrapy
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
de82ca85
编写于
2月 08, 2021
作者:
A
Adrián Chaves
提交者:
GitHub
2月 08, 2021
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4982 from wRAR/refactor-spidermiddlewaremanager
Refactor SpiderMiddlewareManager.scrape_response.
上级
904a5013
1e9b52c3
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
77 addition
and
71 deletion
+77
-71
scrapy/core/spidermw.py
scrapy/core/spidermw.py
+77
-71
未找到文件。
scrapy/core/spidermw.py
浏览文件 @
de82ca85
...
...
@@ -41,86 +41,92 @@ class SpiderMiddlewareManager(MiddlewareManager):
process_spider_exception
=
getattr
(
mw
,
'process_spider_exception'
,
None
)
self
.
methods
[
'process_spider_exception'
].
appendleft
(
process_spider_exception
)
def
scrape_response
(
self
,
scrape_func
,
response
,
request
,
spider
):
def
process_spider_input
(
response
):
for
method
in
self
.
methods
[
'process_spider_input'
]:
try
:
result
=
method
(
response
=
response
,
spider
=
spider
)
if
result
is
not
None
:
msg
=
(
f
"Middleware
{
_fname
(
method
)
}
must return None "
f
"or raise an exception, got
{
type
(
result
)
}
"
)
raise
_InvalidOutput
(
msg
)
except
_InvalidOutput
:
raise
except
Exception
:
return
scrape_func
(
Failure
(),
request
,
spider
)
return
scrape_func
(
response
,
request
,
spider
)
def
_evaluate_iterable
(
iterable
,
exception_processor_index
,
recover_to
):
def
_process_spider_input
(
self
,
scrape_func
,
response
,
request
,
spider
):
for
method
in
self
.
methods
[
'process_spider_input'
]:
try
:
for
r
in
iterable
:
yield
r
except
Exception
as
ex
:
exception_result
=
process_spider_exception
(
Failure
(
ex
),
exception_processor_index
)
if
isinstance
(
exception_result
,
Failure
):
raise
recover_to
.
extend
(
exception_result
)
def
process_spider_exception
(
_failure
,
start_index
=
0
):
exception
=
_failure
.
value
# don't handle _InvalidOutput exception
if
isinstance
(
exception
,
_InvalidOutput
):
return
_failure
method_list
=
islice
(
self
.
methods
[
'process_spider_exception'
],
start_index
,
None
)
for
method_index
,
method
in
enumerate
(
method_list
,
start
=
start_index
):
if
method
is
None
:
continue
result
=
method
(
response
=
response
,
exception
=
exception
,
spider
=
spider
)
if
_isiterable
(
result
):
# stop exception handling by handing control over to the
# process_spider_output chain if an iterable has been returned
return
process_spider_output
(
result
,
method_index
+
1
)
elif
result
is
None
:
continue
else
:
result
=
method
(
response
=
response
,
spider
=
spider
)
if
result
is
not
None
:
msg
=
(
f
"Middleware
{
_fname
(
method
)
}
must return None "
f
"or
an iterable
, got
{
type
(
result
)
}
"
)
f
"or
raise an exception
, got
{
type
(
result
)
}
"
)
raise
_InvalidOutput
(
msg
)
except
_InvalidOutput
:
raise
except
Exception
:
return
scrape_func
(
Failure
(),
request
,
spider
)
return
scrape_func
(
response
,
request
,
spider
)
def
_evaluate_iterable
(
self
,
response
,
spider
,
iterable
,
exception_processor_index
,
recover_to
):
try
:
for
r
in
iterable
:
yield
r
except
Exception
as
ex
:
exception_result
=
self
.
_process_spider_exception
(
response
,
spider
,
Failure
(
ex
),
exception_processor_index
)
if
isinstance
(
exception_result
,
Failure
):
raise
recover_to
.
extend
(
exception_result
)
def
_process_spider_exception
(
self
,
response
,
spider
,
_failure
,
start_index
=
0
):
exception
=
_failure
.
value
# don't handle _InvalidOutput exception
if
isinstance
(
exception
,
_InvalidOutput
):
return
_failure
method_list
=
islice
(
self
.
methods
[
'process_spider_exception'
],
start_index
,
None
)
for
method_index
,
method
in
enumerate
(
method_list
,
start
=
start_index
):
if
method
is
None
:
continue
result
=
method
(
response
=
response
,
exception
=
exception
,
spider
=
spider
)
if
_isiterable
(
result
):
# stop exception handling by handing control over to the
# process_spider_output chain if an iterable has been returned
return
self
.
_process_spider_output
(
response
,
spider
,
result
,
method_index
+
1
)
elif
result
is
None
:
continue
else
:
msg
=
(
f
"Middleware
{
_fname
(
method
)
}
must return None "
f
"or an iterable, got
{
type
(
result
)
}
"
)
raise
_InvalidOutput
(
msg
)
return
_failure
def
_process_spider_output
(
self
,
response
,
spider
,
result
,
start_index
=
0
):
# items in this iterable do not need to go through the process_spider_output
# chain, they went through it already from the process_spider_exception method
recovered
=
MutableChain
()
method_list
=
islice
(
self
.
methods
[
'process_spider_output'
],
start_index
,
None
)
for
method_index
,
method
in
enumerate
(
method_list
,
start
=
start_index
):
if
method
is
None
:
continue
try
:
# might fail directly if the output value is not a generator
result
=
method
(
response
=
response
,
result
=
result
,
spider
=
spider
)
except
Exception
as
ex
:
exception_result
=
self
.
_process_spider_exception
(
response
,
spider
,
Failure
(
ex
),
method_index
+
1
)
if
isinstance
(
exception_result
,
Failure
):
raise
return
exception_result
if
_isiterable
(
result
):
result
=
self
.
_evaluate_iterable
(
response
,
spider
,
result
,
method_index
+
1
,
recovered
)
else
:
msg
=
(
f
"Middleware
{
_fname
(
method
)
}
must return an "
f
"iterable, got
{
type
(
result
)
}
"
)
raise
_InvalidOutput
(
msg
)
def
process_spider_output
(
result
,
start_index
=
0
):
# items in this iterable do not need to go through the process_spider_output
# chain, they went through it already from the process_spider_exception method
recovered
=
MutableChain
()
method_list
=
islice
(
self
.
methods
[
'process_spider_output'
],
start_index
,
None
)
for
method_index
,
method
in
enumerate
(
method_list
,
start
=
start_index
):
if
method
is
None
:
continue
try
:
# might fail directly if the output value is not a generator
result
=
method
(
response
=
response
,
result
=
result
,
spider
=
spider
)
except
Exception
as
ex
:
exception_result
=
process_spider_exception
(
Failure
(
ex
),
method_index
+
1
)
if
isinstance
(
exception_result
,
Failure
):
raise
return
exception_result
if
_isiterable
(
result
):
result
=
_evaluate_iterable
(
result
,
method_index
+
1
,
recovered
)
else
:
msg
=
(
f
"Middleware
{
_fname
(
method
)
}
must return an "
f
"iterable, got
{
type
(
result
)
}
"
)
raise
_InvalidOutput
(
msg
)
return
MutableChain
(
result
,
recovered
)
return
MutableChain
(
result
,
recovered
)
def
_process_callback_output
(
self
,
response
,
spider
,
result
):
recovered
=
MutableChain
()
result
=
self
.
_evaluate_iterable
(
response
,
spider
,
result
,
0
,
recovered
)
return
MutableChain
(
self
.
_process_spider_output
(
response
,
spider
,
result
),
recovered
)
def
scrape_response
(
self
,
scrape_func
,
response
,
request
,
spider
):
def
process_callback_output
(
result
):
recovered
=
MutableChain
()
result
=
_evaluate_iterable
(
result
,
0
,
recovered
)
return
MutableChain
(
process_spider_output
(
result
),
recovered
)
return
self
.
_process_callback_output
(
response
,
spider
,
result
)
def
process_spider_exception
(
_failure
):
return
self
.
_process_spider_exception
(
response
,
spider
,
_failure
)
dfd
=
mustbe_deferred
(
process_spider_input
,
response
)
dfd
=
mustbe_deferred
(
self
.
_process_spider_input
,
scrape_func
,
response
,
request
,
spider
)
dfd
.
addCallbacks
(
callback
=
process_callback_output
,
errback
=
process_spider_exception
)
return
dfd
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录