Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
looyolo
scrapy
提交
846fd835
S
scrapy
项目概览
looyolo
/
scrapy
与 Fork 源项目一致
从无法访问的项目Fork
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
S
scrapy
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
846fd835
编写于
11月 11, 2017
作者:
I
IAlwaysBeCoding
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
removed commented out code, wrapped line to pep-8 and removed backlashes
上级
f729d748
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
65 addition
and
0 deletion
+65
-0
docs/topics/commands.rst
docs/topics/commands.rst
+3
-0
scrapy/commands/parse.py
scrapy/commands/parse.py
+23
-0
tests/test_command_parse.py
tests/test_command_parse.py
+39
-0
未找到文件。
docs/topics/commands.rst
浏览文件 @
846fd835
...
@@ -430,6 +430,9 @@ Supported options:
...
@@ -430,6 +430,9 @@ Supported options:
* ``--callback`` or ``-c``: spider method to use as callback for parsing the
* ``--callback`` or ``-c``: spider method to use as callback for parsing the
response
response
* ``--meta`` or ``-m``: additional request meta that will be pass to the callback
request. This must be a valid json string. Example: --meta='{"foo" : "bar"}'
* ``--pipelines``: process items through pipelines
* ``--pipelines``: process items through pipelines
* ``--rules`` or ``-r``: use :class:`~scrapy.spiders.CrawlSpider`
* ``--rules`` or ``-r``: use :class:`~scrapy.spiders.CrawlSpider`
...
...
scrapy/commands/parse.py
浏览文件 @
846fd835
from
__future__
import
print_function
from
__future__
import
print_function
import
json
import
logging
import
logging
from
w3lib.url
import
is_url
from
w3lib.url
import
is_url
...
@@ -48,6 +49,8 @@ class Command(ScrapyCommand):
...
@@ -48,6 +49,8 @@ class Command(ScrapyCommand):
help
=
"use CrawlSpider rules to discover the callback"
)
help
=
"use CrawlSpider rules to discover the callback"
)
parser
.
add_option
(
"-c"
,
"--callback"
,
dest
=
"callback"
,
parser
.
add_option
(
"-c"
,
"--callback"
,
dest
=
"callback"
,
help
=
"use this callback for parsing, instead looking for a callback"
)
help
=
"use this callback for parsing, instead looking for a callback"
)
parser
.
add_option
(
"-m"
,
"--meta"
,
dest
=
"meta"
,
help
=
"inject extra meta into the Request, it must be a valid raw json string"
)
parser
.
add_option
(
"-d"
,
"--depth"
,
dest
=
"depth"
,
type
=
"int"
,
default
=
1
,
parser
.
add_option
(
"-d"
,
"--depth"
,
dest
=
"depth"
,
type
=
"int"
,
default
=
1
,
help
=
"maximum depth for parsing requests [default: %default]"
)
help
=
"maximum depth for parsing requests [default: %default]"
)
parser
.
add_option
(
"-v"
,
"--verbose"
,
dest
=
"verbose"
,
action
=
"store_true"
,
parser
.
add_option
(
"-v"
,
"--verbose"
,
dest
=
"verbose"
,
action
=
"store_true"
,
...
@@ -204,6 +207,10 @@ class Command(ScrapyCommand):
...
@@ -204,6 +207,10 @@ class Command(ScrapyCommand):
req
.
callback
=
callback
req
.
callback
=
callback
return
requests
return
requests
#update request meta if any extra meta was passed through the --meta/-m opts.
if
opts
.
meta
:
request
.
meta
.
update
(
opts
.
meta
)
request
.
meta
[
'_depth'
]
=
1
request
.
meta
[
'_depth'
]
=
1
request
.
meta
[
'_callback'
]
=
request
.
callback
request
.
meta
[
'_callback'
]
=
request
.
callback
request
.
callback
=
callback
request
.
callback
=
callback
...
@@ -211,11 +218,27 @@ class Command(ScrapyCommand):
...
@@ -211,11 +218,27 @@ class Command(ScrapyCommand):
def
process_options
(
self
,
args
,
opts
):
def
process_options
(
self
,
args
,
opts
):
ScrapyCommand
.
process_options
(
self
,
args
,
opts
)
ScrapyCommand
.
process_options
(
self
,
args
,
opts
)
self
.
process_spider_arguments
(
opts
)
self
.
process_request_meta
(
opts
)
def
process_spider_arguments
(
self
,
opts
):
try
:
try
:
opts
.
spargs
=
arglist_to_dict
(
opts
.
spargs
)
opts
.
spargs
=
arglist_to_dict
(
opts
.
spargs
)
except
ValueError
:
except
ValueError
:
raise
UsageError
(
"Invalid -a value, use -a NAME=VALUE"
,
print_help
=
False
)
raise
UsageError
(
"Invalid -a value, use -a NAME=VALUE"
,
print_help
=
False
)
def
process_request_meta
(
self
,
opts
):
if
opts
.
meta
:
try
:
opts
.
meta
=
json
.
loads
(
opts
.
meta
)
except
ValueError
:
raise
UsageError
(
"Invalid -m/--meta value, pass a valid json string to -m or --meta. "
\
"Example: --meta='{
\"
foo
\"
:
\"
bar
\"
}'"
,
print_help
=
False
)
def
run
(
self
,
args
,
opts
):
def
run
(
self
,
args
,
opts
):
# parse arguments
# parse arguments
if
not
len
(
args
)
==
1
or
not
is_url
(
args
[
0
]):
if
not
len
(
args
)
==
1
or
not
is_url
(
args
[
0
]):
...
...
tests/test_command_parse.py
浏览文件 @
846fd835
...
@@ -29,6 +29,21 @@ class MySpider(scrapy.Spider):
...
@@ -29,6 +29,21 @@ class MySpider(scrapy.Spider):
self.logger.debug('It Works!')
self.logger.debug('It Works!')
return [scrapy.Item(), dict(foo='bar')]
return [scrapy.Item(), dict(foo='bar')]
def parse_request_with_meta(self, response):
foo = response.meta.get('foo', 'bar')
if foo == 'bar':
self.logger.debug('It Does Not Work :(')
else:
self.logger.debug('It Works!')
def parse_request_without_meta(self, response):
foo = response.meta.get('foo', 'bar')
if foo == 'bar':
self.logger.debug('It Works!')
else:
self.logger.debug('It Does Not Work :(')
class MyGoodCrawlSpider(CrawlSpider):
class MyGoodCrawlSpider(CrawlSpider):
name = 'goodcrawl{0}'
name = 'goodcrawl{0}'
...
@@ -84,6 +99,30 @@ ITEM_PIPELINES = {'%s.pipelines.MyPipeline': 1}
...
@@ -84,6 +99,30 @@ ITEM_PIPELINES = {'%s.pipelines.MyPipeline': 1}
self
.
url
(
'/html'
)])
self
.
url
(
'/html'
)])
self
.
assertIn
(
"DEBUG: It Works!"
,
to_native_str
(
stderr
))
self
.
assertIn
(
"DEBUG: It Works!"
,
to_native_str
(
stderr
))
@
defer
.
inlineCallbacks
def
test_request_with_meta
(
self
):
raw_json_string
=
'{"foo" : "baz"}'
_
,
_
,
stderr
=
yield
self
.
execute
([
'--spider'
,
self
.
spider_name
,
'--meta'
,
raw_json_string
,
'-c'
,
'parse_request_with_meta'
,
self
.
url
(
'/html'
)])
self
.
assertIn
(
"DEBUG: It Works!"
,
to_native_str
(
stderr
))
_
,
_
,
stderr
=
yield
self
.
execute
([
'--spider'
,
self
.
spider_name
,
'-m'
,
raw_json_string
,
'-c'
,
'parse_request_with_meta'
,
self
.
url
(
'/html'
)])
self
.
assertIn
(
"DEBUG: It Works!"
,
to_native_str
(
stderr
))
@
defer
.
inlineCallbacks
def
test_request_without_meta
(
self
):
_
,
_
,
stderr
=
yield
self
.
execute
([
'--spider'
,
self
.
spider_name
,
'-c'
,
'parse_request_without_meta'
,
self
.
url
(
'/html'
)])
self
.
assertIn
(
"DEBUG: It Works!"
,
to_native_str
(
stderr
))
@
defer
.
inlineCallbacks
@
defer
.
inlineCallbacks
def
test_pipelines
(
self
):
def
test_pipelines
(
self
):
_
,
_
,
stderr
=
yield
self
.
execute
([
'--spider'
,
self
.
spider_name
,
_
,
_
,
stderr
=
yield
self
.
execute
([
'--spider'
,
self
.
spider_name
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录