Merge branch 'Curita-per-spider-settings'

5daa1477 · Daniel Graña · ccde3317 · c2592b39 · 5daa1477 · 5daa1477
8 changed file
--- a/docs/topics/api.rst
+++ b/docs/topics/api.rst
@@ -172,6 +172,7 @@ Settings API
            'default': 0,
            'command': 10,
            'project': 20,
+            'spider': 30,
            'cmdline': 40,
        }


--- a/docs/topics/settings.rst
+++ b/docs/topics/settings.rst
@@ -36,9 +36,10 @@ different precedence. Here is the list of them in decreasing order of
 precedence:

 1. Command line options (most precedence)
- 2. Project settings module
- 3. Default settings per-command
- 4. Default global settings (less precedence)
+ 2. Settings per-spider
+ 3. Project settings module
+ 4. Default settings per-command
+ 5. Default global settings (less precedence)

 The population of these settings sources is taken care of internally, but a
 manual handling is possible using API calls. See the
@@ -59,14 +60,21 @@ Example::

    scrapy crawl myspider -s LOG_FILE=scrapy.log

-2. Project settings module
+2. Settings per-spider
+----------------------
+
+Spiders (See the :ref:`topics-spiders` chapter for reference) can define their
+own settings that will take precedence and override the project ones. They can
+do so by setting their :attr:`scrapy.spider.Spider.custom_settings` attribute.
+
+3. Project settings module
 --------------------------

 The project settings module is the standard configuration file for your Scrapy
 project.  It's where most of your custom settings will be populated. For
 example:: ``myproject.settings``.

-3. Default settings per-command
+4. Default settings per-command
 -------------------------------

 Each :doc:`Scrapy tool </topics/commands>` command can have its own default
@@ -74,7 +82,7 @@ settings, which override the global default settings. Those custom command
 settings are specified in the ``default_settings`` attribute of the command
 class.

-4. Default global settings
+5. Default global settings
 --------------------------

 The global defaults are located in the ``scrapy.settings.default_settings``

--- a/docs/topics/spiders.rst
+++ b/docs/topics/spiders.rst
@@ -133,6 +133,15 @@ Spider
       listed here. The subsequent URLs will be generated successively from data
       contained in the start URLs.

+   .. attribute:: custom_settings
+
+      A dictionary of settings that will be overridden from the project wide
+      configuration when running this spider. It must be defined as a class
+      attribute since the settings are updated before instantiation.
+
+      For a list of available built-in settings see:
+      :ref:`topics-settings-ref`.
+
   .. attribute:: crawler

      This attribute is set by the :meth:`from_crawler` class method after

--- a/scrapy/crawler.py
+++ b/scrapy/crawler.py
@@ -96,7 +96,12 @@ class CrawlerRunner(object):
    def _create_crawler(self, spidercls):
        if isinstance(spidercls, six.string_types):
            spidercls = self.spiders.load(spidercls)
-        crawler = Crawler(spidercls, self.settings.frozencopy())
+
+        crawler_settings = self.settings.copy()
+        spidercls.update_settings(crawler_settings)
+        crawler_settings.freeze()
+
+        crawler = Crawler(spidercls, crawler_settings)
        return crawler

    def stop(self):

--- a/scrapy/settings/__init__.py
+++ b/scrapy/settings/__init__.py
@@ -15,6 +15,7 @@ SETTINGS_PRIORITIES = {
    'default': 0,
    'command': 10,
    'project': 20,
+    'spider': 30,
    'cmdline': 40,
 }


--- a/scrapy/spider.py
+++ b/scrapy/spider.py
@@ -20,6 +20,7 @@ class Spider(object_ref):
    """

    name = None
+    custom_settings = None

    def __init__(self, name=None, **kwargs):
        if name is not None:
@@ -66,6 +67,10 @@ class Spider(object_ref):
    def parse(self, response):
        raise NotImplementedError

+    @classmethod
+    def update_settings(cls, settings):
+        settings.setdict(cls.custom_settings or {}, priority='spider')
+
    @classmethod
    def handles_request(cls, request):
        return url_is_from_spider(request.url, cls)

--- a/tests/test_crawler.py
+++ b/tests/test_crawler.py
 import warnings
 import unittest

-from scrapy.crawler import Crawler
+from twisted.internet import defer
+
+from scrapy.crawler import Crawler, CrawlerRunner
 from scrapy.settings import Settings
 from scrapy.utils.spider import DefaultSpider
 from scrapy.utils.misc import load_object
@@ -22,3 +24,26 @@ class CrawlerTestCase(unittest.TestCase):

            self.crawler.spiders
            self.assertEqual(len(w), 1, "Warn deprecated access only once")
+
+
+class CrawlerRunnerTest(unittest.TestCase):
+
+    def setUp(self):
+        self.crawler_runner = CrawlerRunner(Settings())
+
+    @defer.inlineCallbacks
+    def test_populate_spidercls_settings(self):
+        spider_settings = {'TEST1': 'spider', 'TEST2': 'spider'}
+        project_settings = {'TEST1': 'project', 'TEST3': 'project'}
+
+        class CustomSettingsSpider(DefaultSpider):
+            custom_settings = spider_settings
+
+        self.crawler_runner.settings.setdict(project_settings,
+                                             priority='project')
+
+        yield self.crawler_runner.crawl(CustomSettingsSpider)
+        crawler = self.crawler_runner.crawlers.pop()
+        self.assertEqual(crawler.settings.get('TEST1'), 'spider')
+        self.assertEqual(crawler.settings.get('TEST2'), 'spider')
+        self.assertEqual(crawler.settings.get('TEST3'), 'project')
--- a/tests/test_spider.py
+++ b/tests/test_spider.py
@@ -10,6 +10,7 @@ except ImportError:

 from scrapy import signals
 from scrapy.spider import Spider, BaseSpider
+from scrapy.settings import Settings
 from scrapy.http import Request, Response, TextResponse, XmlResponse, HtmlResponse
 from scrapy.contrib.spiders.init import InitSpider
 from scrapy.contrib.spiders import CrawlSpider, Rule, XMLFeedSpider, \
@@ -92,6 +93,16 @@ class SpiderTest(unittest.TestCase):
                                       spider=spider, reason=None)
        self.assertTrue(spider.closed_called)

+    def test_update_settings(self):
+        spider_settings = {'TEST1': 'spider', 'TEST2': 'spider'}
+        project_settings = {'TEST1': 'project', 'TEST3': 'project'}
+        self.spider_class.custom_settings = spider_settings
+        settings = Settings(project_settings, priority='project')
+
+        self.spider_class.update_settings(settings)
+        self.assertEqual(settings.get('TEST1'), 'spider')
+        self.assertEqual(settings.get('TEST2'), 'spider')
+        self.assertEqual(settings.get('TEST3'), 'project')

 class InitSpiderTest(SpiderTest):