提交 5daa1477 编写于 作者: D Daniel Graña

Merge branch 'Curita-per-spider-settings'

......@@ -172,6 +172,7 @@ Settings API
'default': 0,
'command': 10,
'project': 20,
'spider': 30,
'cmdline': 40,
}
......
......@@ -36,9 +36,10 @@ different precedence. Here is the list of them in decreasing order of
precedence:
1. Command line options (most precedence)
2. Project settings module
3. Default settings per-command
4. Default global settings (less precedence)
2. Settings per-spider
3. Project settings module
4. Default settings per-command
5. Default global settings (less precedence)
The population of these settings sources is taken care of internally, but a
manual handling is possible using API calls. See the
......@@ -59,14 +60,21 @@ Example::
scrapy crawl myspider -s LOG_FILE=scrapy.log
2. Project settings module
2. Settings per-spider
----------------------
Spiders (See the :ref:`topics-spiders` chapter for reference) can define their
own settings that will take precedence and override the project ones. They can
do so by setting their :attr:`scrapy.spider.Spider.custom_settings` attribute.
3. Project settings module
--------------------------
The project settings module is the standard configuration file for your Scrapy
project. It's where most of your custom settings will be populated. For
example:: ``myproject.settings``.
3. Default settings per-command
4. Default settings per-command
-------------------------------
Each :doc:`Scrapy tool </topics/commands>` command can have its own default
......@@ -74,7 +82,7 @@ settings, which override the global default settings. Those custom command
settings are specified in the ``default_settings`` attribute of the command
class.
4. Default global settings
5. Default global settings
--------------------------
The global defaults are located in the ``scrapy.settings.default_settings``
......
......@@ -133,6 +133,15 @@ Spider
listed here. The subsequent URLs will be generated successively from data
contained in the start URLs.
.. attribute:: custom_settings
A dictionary of settings that will be overridden from the project wide
configuration when running this spider. It must be defined as a class
attribute since the settings are updated before instantiation.
For a list of available built-in settings see:
:ref:`topics-settings-ref`.
.. attribute:: crawler
This attribute is set by the :meth:`from_crawler` class method after
......
......@@ -96,7 +96,12 @@ class CrawlerRunner(object):
def _create_crawler(self, spidercls):
if isinstance(spidercls, six.string_types):
spidercls = self.spiders.load(spidercls)
crawler = Crawler(spidercls, self.settings.frozencopy())
crawler_settings = self.settings.copy()
spidercls.update_settings(crawler_settings)
crawler_settings.freeze()
crawler = Crawler(spidercls, crawler_settings)
return crawler
def stop(self):
......
......@@ -15,6 +15,7 @@ SETTINGS_PRIORITIES = {
'default': 0,
'command': 10,
'project': 20,
'spider': 30,
'cmdline': 40,
}
......
......@@ -20,6 +20,7 @@ class Spider(object_ref):
"""
name = None
custom_settings = None
def __init__(self, name=None, **kwargs):
if name is not None:
......@@ -66,6 +67,10 @@ class Spider(object_ref):
def parse(self, response):
raise NotImplementedError
@classmethod
def update_settings(cls, settings):
settings.setdict(cls.custom_settings or {}, priority='spider')
@classmethod
def handles_request(cls, request):
return url_is_from_spider(request.url, cls)
......
import warnings
import unittest
from scrapy.crawler import Crawler
from twisted.internet import defer
from scrapy.crawler import Crawler, CrawlerRunner
from scrapy.settings import Settings
from scrapy.utils.spider import DefaultSpider
from scrapy.utils.misc import load_object
......@@ -22,3 +24,26 @@ class CrawlerTestCase(unittest.TestCase):
self.crawler.spiders
self.assertEqual(len(w), 1, "Warn deprecated access only once")
class CrawlerRunnerTest(unittest.TestCase):
def setUp(self):
self.crawler_runner = CrawlerRunner(Settings())
@defer.inlineCallbacks
def test_populate_spidercls_settings(self):
spider_settings = {'TEST1': 'spider', 'TEST2': 'spider'}
project_settings = {'TEST1': 'project', 'TEST3': 'project'}
class CustomSettingsSpider(DefaultSpider):
custom_settings = spider_settings
self.crawler_runner.settings.setdict(project_settings,
priority='project')
yield self.crawler_runner.crawl(CustomSettingsSpider)
crawler = self.crawler_runner.crawlers.pop()
self.assertEqual(crawler.settings.get('TEST1'), 'spider')
self.assertEqual(crawler.settings.get('TEST2'), 'spider')
self.assertEqual(crawler.settings.get('TEST3'), 'project')
......@@ -10,6 +10,7 @@ except ImportError:
from scrapy import signals
from scrapy.spider import Spider, BaseSpider
from scrapy.settings import Settings
from scrapy.http import Request, Response, TextResponse, XmlResponse, HtmlResponse
from scrapy.contrib.spiders.init import InitSpider
from scrapy.contrib.spiders import CrawlSpider, Rule, XMLFeedSpider, \
......@@ -92,6 +93,16 @@ class SpiderTest(unittest.TestCase):
spider=spider, reason=None)
self.assertTrue(spider.closed_called)
def test_update_settings(self):
spider_settings = {'TEST1': 'spider', 'TEST2': 'spider'}
project_settings = {'TEST1': 'project', 'TEST3': 'project'}
self.spider_class.custom_settings = spider_settings
settings = Settings(project_settings, priority='project')
self.spider_class.update_settings(settings)
self.assertEqual(settings.get('TEST1'), 'spider')
self.assertEqual(settings.get('TEST2'), 'spider')
self.assertEqual(settings.get('TEST3'), 'project')
class InitSpiderTest(SpiderTest):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册