Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
looyolo
scrapy
提交
2a6524ee
S
scrapy
项目概览
looyolo
/
scrapy
与 Fork 源项目一致
从无法访问的项目Fork
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
S
scrapy
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
2a6524ee
编写于
2月 27, 2016
作者:
N
nyov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Allow core Scheduler priority queue customization
上级
74158611
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
9 addition
and
5 deletion
+9
-5
scrapy/core/scheduler.py
scrapy/core/scheduler.py
+8
-5
scrapy/settings/default_settings.py
scrapy/settings/default_settings.py
+1
-0
未找到文件。
scrapy/core/scheduler.py
浏览文件 @
2a6524ee
...
...
@@ -3,7 +3,6 @@ import json
import
logging
from
os.path
import
join
,
exists
from
queuelib
import
PriorityQueue
from
scrapy.utils.reqser
import
request_to_dict
,
request_from_dict
from
scrapy.utils.misc
import
load_object
from
scrapy.utils.job
import
job_dir
...
...
@@ -13,9 +12,11 @@ logger = logging.getLogger(__name__)
class
Scheduler
(
object
):
def
__init__
(
self
,
dupefilter
,
jobdir
=
None
,
dqclass
=
None
,
mqclass
=
None
,
logunser
=
False
,
stats
=
None
):
def
__init__
(
self
,
dupefilter
,
jobdir
=
None
,
dqclass
=
None
,
mqclass
=
None
,
logunser
=
False
,
stats
=
None
,
pqclass
=
None
):
self
.
df
=
dupefilter
self
.
dqdir
=
self
.
_dqdir
(
jobdir
)
self
.
pqclass
=
pqclass
self
.
dqclass
=
dqclass
self
.
mqclass
=
mqclass
self
.
logunser
=
logunser
...
...
@@ -26,17 +27,19 @@ class Scheduler(object):
settings
=
crawler
.
settings
dupefilter_cls
=
load_object
(
settings
[
'DUPEFILTER_CLASS'
])
dupefilter
=
dupefilter_cls
.
from_settings
(
settings
)
pqclass
=
load_object
(
settings
[
'SCHEDULER_PRIORITY_QUEUE'
])
dqclass
=
load_object
(
settings
[
'SCHEDULER_DISK_QUEUE'
])
mqclass
=
load_object
(
settings
[
'SCHEDULER_MEMORY_QUEUE'
])
logunser
=
settings
.
getbool
(
'LOG_UNSERIALIZABLE_REQUESTS'
)
return
cls
(
dupefilter
,
job_dir
(
settings
),
dqclass
,
mqclass
,
logunser
,
crawler
.
stats
)
return
cls
(
dupefilter
,
jobdir
=
job_dir
(
settings
),
logunser
=
logunser
,
stats
=
crawler
.
stats
,
pqclass
=
pqclass
,
dqclass
=
dqclass
,
mqclass
=
mqclass
)
def
has_pending_requests
(
self
):
return
len
(
self
)
>
0
def
open
(
self
,
spider
):
self
.
spider
=
spider
self
.
mqs
=
PriorityQueue
(
self
.
_newmq
)
self
.
mqs
=
self
.
pqclass
(
self
.
_newmq
)
self
.
dqs
=
self
.
_dq
()
if
self
.
dqdir
else
None
return
self
.
df
.
open
()
...
...
@@ -112,7 +115,7 @@ class Scheduler(object):
prios
=
json
.
load
(
f
)
else
:
prios
=
()
q
=
PriorityQueue
(
self
.
_newdq
,
startprios
=
prios
)
q
=
self
.
pqclass
(
self
.
_newdq
,
startprios
=
prios
)
if
q
:
logger
.
info
(
"Resuming crawl (%(queuesize)d requests scheduled)"
,
{
'queuesize'
:
len
(
q
)},
extra
=
{
'spider'
:
self
.
spider
})
...
...
scrapy/settings/default_settings.py
浏览文件 @
2a6524ee
...
...
@@ -234,6 +234,7 @@ ROBOTSTXT_OBEY = False
SCHEDULER
=
'scrapy.core.scheduler.Scheduler'
SCHEDULER_DISK_QUEUE
=
'scrapy.squeues.PickleLifoDiskQueue'
SCHEDULER_MEMORY_QUEUE
=
'scrapy.squeues.LifoMemoryQueue'
SCHEDULER_PRIORITY_QUEUE
=
'queuelib.PriorityQueue'
SPIDER_LOADER_CLASS
=
'scrapy.spiderloader.SpiderLoader'
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录