提交 846fd835 编写于 作者: I IAlwaysBeCoding

removed commented out code, wrapped line to pep-8 and removed backlashes

上级 f729d748
...@@ -430,6 +430,9 @@ Supported options: ...@@ -430,6 +430,9 @@ Supported options:
* ``--callback`` or ``-c``: spider method to use as callback for parsing the * ``--callback`` or ``-c``: spider method to use as callback for parsing the
response response
* ``--meta`` or ``-m``: additional request meta that will be pass to the callback
request. This must be a valid json string. Example: --meta='{"foo" : "bar"}'
* ``--pipelines``: process items through pipelines * ``--pipelines``: process items through pipelines
* ``--rules`` or ``-r``: use :class:`~scrapy.spiders.CrawlSpider` * ``--rules`` or ``-r``: use :class:`~scrapy.spiders.CrawlSpider`
......
from __future__ import print_function from __future__ import print_function
import json
import logging import logging
from w3lib.url import is_url from w3lib.url import is_url
...@@ -48,6 +49,8 @@ class Command(ScrapyCommand): ...@@ -48,6 +49,8 @@ class Command(ScrapyCommand):
help="use CrawlSpider rules to discover the callback") help="use CrawlSpider rules to discover the callback")
parser.add_option("-c", "--callback", dest="callback", parser.add_option("-c", "--callback", dest="callback",
help="use this callback for parsing, instead looking for a callback") help="use this callback for parsing, instead looking for a callback")
parser.add_option("-m", "--meta", dest="meta",
help="inject extra meta into the Request, it must be a valid raw json string")
parser.add_option("-d", "--depth", dest="depth", type="int", default=1, parser.add_option("-d", "--depth", dest="depth", type="int", default=1,
help="maximum depth for parsing requests [default: %default]") help="maximum depth for parsing requests [default: %default]")
parser.add_option("-v", "--verbose", dest="verbose", action="store_true", parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
...@@ -204,6 +207,10 @@ class Command(ScrapyCommand): ...@@ -204,6 +207,10 @@ class Command(ScrapyCommand):
req.callback = callback req.callback = callback
return requests return requests
#update request meta if any extra meta was passed through the --meta/-m opts.
if opts.meta:
request.meta.update(opts.meta)
request.meta['_depth'] = 1 request.meta['_depth'] = 1
request.meta['_callback'] = request.callback request.meta['_callback'] = request.callback
request.callback = callback request.callback = callback
...@@ -211,11 +218,27 @@ class Command(ScrapyCommand): ...@@ -211,11 +218,27 @@ class Command(ScrapyCommand):
def process_options(self, args, opts): def process_options(self, args, opts):
ScrapyCommand.process_options(self, args, opts) ScrapyCommand.process_options(self, args, opts)
self.process_spider_arguments(opts)
self.process_request_meta(opts)
def process_spider_arguments(self, opts):
try: try:
opts.spargs = arglist_to_dict(opts.spargs) opts.spargs = arglist_to_dict(opts.spargs)
except ValueError: except ValueError:
raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
def process_request_meta(self, opts):
if opts.meta:
try:
opts.meta = json.loads(opts.meta)
except ValueError:
raise UsageError("Invalid -m/--meta value, pass a valid json string to -m or --meta. " \
"Example: --meta='{\"foo\" : \"bar\"}'", print_help=False)
def run(self, args, opts): def run(self, args, opts):
# parse arguments # parse arguments
if not len(args) == 1 or not is_url(args[0]): if not len(args) == 1 or not is_url(args[0]):
......
...@@ -29,6 +29,21 @@ class MySpider(scrapy.Spider): ...@@ -29,6 +29,21 @@ class MySpider(scrapy.Spider):
self.logger.debug('It Works!') self.logger.debug('It Works!')
return [scrapy.Item(), dict(foo='bar')] return [scrapy.Item(), dict(foo='bar')]
def parse_request_with_meta(self, response):
foo = response.meta.get('foo', 'bar')
if foo == 'bar':
self.logger.debug('It Does Not Work :(')
else:
self.logger.debug('It Works!')
def parse_request_without_meta(self, response):
foo = response.meta.get('foo', 'bar')
if foo == 'bar':
self.logger.debug('It Works!')
else:
self.logger.debug('It Does Not Work :(')
class MyGoodCrawlSpider(CrawlSpider): class MyGoodCrawlSpider(CrawlSpider):
name = 'goodcrawl{0}' name = 'goodcrawl{0}'
...@@ -84,6 +99,30 @@ ITEM_PIPELINES = {'%s.pipelines.MyPipeline': 1} ...@@ -84,6 +99,30 @@ ITEM_PIPELINES = {'%s.pipelines.MyPipeline': 1}
self.url('/html')]) self.url('/html')])
self.assertIn("DEBUG: It Works!", to_native_str(stderr)) self.assertIn("DEBUG: It Works!", to_native_str(stderr))
@defer.inlineCallbacks
def test_request_with_meta(self):
raw_json_string = '{"foo" : "baz"}'
_, _, stderr = yield self.execute(['--spider', self.spider_name,
'--meta', raw_json_string,
'-c', 'parse_request_with_meta',
self.url('/html')])
self.assertIn("DEBUG: It Works!", to_native_str(stderr))
_, _, stderr = yield self.execute(['--spider', self.spider_name,
'-m', raw_json_string,
'-c', 'parse_request_with_meta',
self.url('/html')])
self.assertIn("DEBUG: It Works!", to_native_str(stderr))
@defer.inlineCallbacks
def test_request_without_meta(self):
_, _, stderr = yield self.execute(['--spider', self.spider_name,
'-c', 'parse_request_without_meta',
self.url('/html')])
self.assertIn("DEBUG: It Works!", to_native_str(stderr))
@defer.inlineCallbacks @defer.inlineCallbacks
def test_pipelines(self): def test_pipelines(self):
_, _, stderr = yield self.execute(['--spider', self.spider_name, _, _, stderr = yield self.execute(['--spider', self.spider_name,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册