From 846fd83512bb45335195b060d76b46060b4b6e3d Mon Sep 17 00:00:00 2001 From: IAlwaysBeCoding Date: Sat, 11 Nov 2017 18:30:01 -0500 Subject: [PATCH] removed commented out code, wrapped line to pep-8 and removed backlashes --- docs/topics/commands.rst | 3 +++ scrapy/commands/parse.py | 23 ++++++++++++++++++++++ tests/test_command_parse.py | 39 +++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+) diff --git a/docs/topics/commands.rst b/docs/topics/commands.rst index dc8067d7e..07c69ddda 100644 --- a/docs/topics/commands.rst +++ b/docs/topics/commands.rst @@ -430,6 +430,9 @@ Supported options: * ``--callback`` or ``-c``: spider method to use as callback for parsing the response +* ``--meta`` or ``-m``: additional request meta that will be pass to the callback + request. This must be a valid json string. Example: --meta='{"foo" : "bar"}' + * ``--pipelines``: process items through pipelines * ``--rules`` or ``-r``: use :class:`~scrapy.spiders.CrawlSpider` diff --git a/scrapy/commands/parse.py b/scrapy/commands/parse.py index a90095146..69418a478 100644 --- a/scrapy/commands/parse.py +++ b/scrapy/commands/parse.py @@ -1,4 +1,5 @@ from __future__ import print_function +import json import logging from w3lib.url import is_url @@ -48,6 +49,8 @@ class Command(ScrapyCommand): help="use CrawlSpider rules to discover the callback") parser.add_option("-c", "--callback", dest="callback", help="use this callback for parsing, instead looking for a callback") + parser.add_option("-m", "--meta", dest="meta", + help="inject extra meta into the Request, it must be a valid raw json string") parser.add_option("-d", "--depth", dest="depth", type="int", default=1, help="maximum depth for parsing requests [default: %default]") parser.add_option("-v", "--verbose", dest="verbose", action="store_true", @@ -204,6 +207,10 @@ class Command(ScrapyCommand): req.callback = callback return requests + #update request meta if any extra meta was passed through the --meta/-m opts. + if opts.meta: + request.meta.update(opts.meta) + request.meta['_depth'] = 1 request.meta['_callback'] = request.callback request.callback = callback @@ -211,11 +218,27 @@ class Command(ScrapyCommand): def process_options(self, args, opts): ScrapyCommand.process_options(self, args, opts) + + self.process_spider_arguments(opts) + self.process_request_meta(opts) + + def process_spider_arguments(self, opts): + try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) + def process_request_meta(self, opts): + + if opts.meta: + try: + opts.meta = json.loads(opts.meta) + except ValueError: + raise UsageError("Invalid -m/--meta value, pass a valid json string to -m or --meta. " \ + "Example: --meta='{\"foo\" : \"bar\"}'", print_help=False) + + def run(self, args, opts): # parse arguments if not len(args) == 1 or not is_url(args[0]): diff --git a/tests/test_command_parse.py b/tests/test_command_parse.py index b6d6db9ee..66dd17110 100644 --- a/tests/test_command_parse.py +++ b/tests/test_command_parse.py @@ -29,6 +29,21 @@ class MySpider(scrapy.Spider): self.logger.debug('It Works!') return [scrapy.Item(), dict(foo='bar')] + def parse_request_with_meta(self, response): + foo = response.meta.get('foo', 'bar') + + if foo == 'bar': + self.logger.debug('It Does Not Work :(') + else: + self.logger.debug('It Works!') + + def parse_request_without_meta(self, response): + foo = response.meta.get('foo', 'bar') + + if foo == 'bar': + self.logger.debug('It Works!') + else: + self.logger.debug('It Does Not Work :(') class MyGoodCrawlSpider(CrawlSpider): name = 'goodcrawl{0}' @@ -84,6 +99,30 @@ ITEM_PIPELINES = {'%s.pipelines.MyPipeline': 1} self.url('/html')]) self.assertIn("DEBUG: It Works!", to_native_str(stderr)) + @defer.inlineCallbacks + def test_request_with_meta(self): + raw_json_string = '{"foo" : "baz"}' + _, _, stderr = yield self.execute(['--spider', self.spider_name, + '--meta', raw_json_string, + '-c', 'parse_request_with_meta', + self.url('/html')]) + self.assertIn("DEBUG: It Works!", to_native_str(stderr)) + + _, _, stderr = yield self.execute(['--spider', self.spider_name, + '-m', raw_json_string, + '-c', 'parse_request_with_meta', + self.url('/html')]) + self.assertIn("DEBUG: It Works!", to_native_str(stderr)) + + + @defer.inlineCallbacks + def test_request_without_meta(self): + _, _, stderr = yield self.execute(['--spider', self.spider_name, + '-c', 'parse_request_without_meta', + self.url('/html')]) + self.assertIn("DEBUG: It Works!", to_native_str(stderr)) + + @defer.inlineCallbacks def test_pipelines(self): _, _, stderr = yield self.execute(['--spider', self.spider_name, -- GitLab