提交 728e505e 编写于 作者: P Paul Tremberth

Merge remote-tracking branch 'origin/master' into release-notes-1.1

......@@ -8,7 +8,7 @@ Release notes
This 1.1 release brings a lot of interesting features and bug fixes:
- Scrapy 1.1 has basic Python 3 support (requires Twisted >= 15.5). See
- Scrapy 1.1 has basic Python 3 support (requires Twisted >= 15.5). See
:ref:`news_basicpy3` for more details and some limitations.
- Hot new features:
......@@ -18,7 +18,7 @@ This 1.1 release brings a lot of interesting features and bug fixes:
AutoThrottle docs (:issue:`1324`).
- Added ``response.text`` to get body as unicode (:issue:`1730`).
- Anonymous S3 connections (:issue:`1358`).
- Deferreds in downloader middlewares (:issue:`1473`). This enables better
- Deferreds in downloader middlewares (:issue:`1473`). This enables better
robots.txt handling (:issue:`1471`).
- HTTP cache improvements (:issue:`1151`).
......@@ -70,7 +70,7 @@ Additional New Features and Enhancements
**Warning: backwards incompatible!**
- Bring back support for relative file path (:issue:`1710`, :issue:`1550`).
- Added :setting:`MEMUSAGE_CHECK_INTERVAL_SECONDS` setting to change default check
- Added :setting:`MEMUSAGE_CHECK_INTERVAL_SECONDS` setting to change default check
interval (:issue:`1282`).
- HTTP caching now follows RFC2616 more closely, added settings
:setting:`HTTPCACHE_ALWAYS_STORE` and
......@@ -93,7 +93,7 @@ Additional New Features and Enhancements
- robots.txt compliance now enabled by default for new projects (:issue:`1724`).
- ``CloseSpider`` and ``SpiderState`` extensions now get disabled if no relevant
setting is set (:issue:`1723`, :issue:`1725`).
- Added method ``ExecutionEngine.close`` (:issue:`1423`).
- Added method ``ExecutionEngine.close`` (:issue:`1423`).
- Added method ``CrawlerRunner.create_crawler`` (:issue:`1528`).
- Tons of documentation updates and related fixes (:issue:`1291`, :issue:`1302`,
:issue:`1335`, :issue:`1683`, :issue:`1660`, :issue:`1642`, :issue:`1721`,
......@@ -145,7 +145,7 @@ Bugfixes
- Fixed :setting:`TEMPLATES_DIR` handling (:issue:`1575`).
- Various ``FormRequest`` fixes (:issue:`1595`, :issue:`1596`, :issue:`1597`).
- Makes ``_monkeypatches`` more robust (:issue:`1634`).
- Fixed bug on ``XMLItemExporter`` with non-string fields in
- Fixed bug on ``XMLItemExporter`` with non-string fields in
items (:issue:`1738`).
- Fixed startproject command in OS X (:issue:`1635`).
- Fixed PythonItemExporter and CSVExporter for non-string item
......@@ -155,6 +155,15 @@ Bugfixes
- Fixed bug in ``utils.template.render_templatefile()`` (:issue:`1212`).
1.0.5 (2016-02-04)
------------------
- FIX: [Backport] Ignore bogus links in LinkExtractors (fixes :issue:`907`, :commit:`108195e`)
- TST: Changed buildbot makefile to use 'pytest' (:commit:`1f3d90a`)
- DOC: Fixed typos in tutorial and media-pipeline (:commit:`808a9ea` and :commit:`803bd87`)
- DOC: Add AjaxCrawlMiddleware to DOWNLOADER_MIDDLEWARES_BASE in settings docs (:commit:`aa94121`)
1.0.4 (2015-12-30)
------------------
......
from __future__ import print_function
import json
from scrapy.commands import ScrapyCommand
from scrapy.settings import BaseSettings
class Command(ScrapyCommand):
......@@ -28,7 +31,11 @@ class Command(ScrapyCommand):
def run(self, args, opts):
settings = self.crawler_process.settings
if opts.get:
print(settings.get(opts.get))
s = settings.get(opts.get)
if isinstance(s, BaseSettings):
print(json.dumps(s.copy_to_dict()))
else:
print(s)
elif opts.getbool:
print(settings.getbool(opts.getbool))
elif opts.getint:
......
......@@ -101,4 +101,6 @@ class RobotsTxtMiddleware(object):
rp_dfd.callback(rp)
def _robots_error(self, failure, netloc):
self._parsers.pop(netloc).callback(None)
rp_dfd = self._parsers[netloc]
self._parsers[netloc] = None
rp_dfd.callback(None)
......@@ -4,6 +4,7 @@ import copy
import warnings
from collections import MutableMapping
from importlib import import_module
from pprint import pformat
from scrapy.utils.deprecate import create_deprecated_class
from scrapy.exceptions import ScrapyDeprecationWarning
......@@ -368,11 +369,31 @@ class BaseSettings(MutableMapping):
def __len__(self):
return len(self.attributes)
def __str__(self):
return str(self.attributes)
def _to_dict(self):
return {k: (v._to_dict() if isinstance(v, BaseSettings) else v)
for k, v in six.iteritems(self)}
def copy_to_dict(self):
"""
Make a copy of current settings and convert to a dict.
def __repr__(self):
return "<%s %s>" % (self.__class__.__name__, self.attributes)
This method returns a new dict populated with the same values
and their priorities as the current settings.
Modifications to the returned dict won't be reflected on the original
settings.
This method can be useful for example for printing settings
in Scrapy shell.
"""
settings = self.copy()
return settings._to_dict()
def _repr_pretty_(self, p, cycle):
if cycle:
p.text(repr(self))
else:
p.text(pformat(self.copy_to_dict()))
@property
def overrides(self):
......
......@@ -68,4 +68,4 @@ class CmdlineTest(unittest.TestCase):
settingsstr = settingsstr.replace(char, '"')
settingsdict = json.loads(settingsstr)
six.assertCountEqual(self, settingsdict.keys(), EXTENSIONS.keys())
self.assertIn('value=200', settingsdict[EXT_PATH])
self.assertEquals(200, settingsdict[EXT_PATH])
......@@ -123,6 +123,18 @@ class RobotsTxtMiddlewareTest(unittest.TestCase):
deferred.addCallback(lambda _: self.assertTrue(middleware._logerror.called))
return deferred
def test_robotstxt_immediate_error(self):
self.crawler.settings.set('ROBOTSTXT_OBEY', True)
err = error.DNSLookupError('Robotstxt address not found')
def immediate_failure(request, spider):
deferred = Deferred()
deferred.errback(failure.Failure(err))
return deferred
self.crawler.engine.download.side_effect = immediate_failure
middleware = RobotsTxtMiddleware(self.crawler)
return self.assertNotIgnored(Request('http://site.local'), middleware)
def test_ignore_robotstxt_request(self):
self.crawler.settings.set('ROBOTSTXT_OBEY', True)
def ignore_request(request, spider):
......
......@@ -302,6 +302,21 @@ class BaseSettingsTest(unittest.TestCase):
self.assertListEqual(copy.get('TEST_LIST_OF_LISTS')[0],
['first_one', 'first_two'])
def test_copy_to_dict(self):
s = BaseSettings({'TEST_STRING': 'a string',
'TEST_LIST': [1, 2],
'TEST_BOOLEAN': False,
'TEST_BASE': BaseSettings({1: 1, 2: 2}, 'project'),
'TEST': BaseSettings({1: 10, 3: 30}, 'default'),
'HASNOBASE': BaseSettings({3: 3000}, 'default')})
self.assertDictEqual(s.copy_to_dict(),
{'HASNOBASE': {3: 3000},
'TEST': {1: 10, 3: 30},
'TEST_BASE': {1: 1, 2: 2},
'TEST_BOOLEAN': False,
'TEST_LIST': [1, 2],
'TEST_STRING': 'a string'})
def test_freeze(self):
self.settings.freeze()
with self.assertRaises(TypeError) as cm:
......@@ -343,14 +358,6 @@ class BaseSettingsTest(unittest.TestCase):
self.assertEqual(self.settings.defaults.get('BAR'), 'foo')
self.assertIn('BAR', self.settings.defaults)
def test_repr(self):
settings = BaseSettings()
self.assertEqual(repr(settings), "<BaseSettings {}>")
attr = SettingsAttribute('testval', 15)
settings['testkey'] = attr
self.assertEqual(repr(settings),
"<BaseSettings {'testkey': %s}>" % repr(attr))
class SettingsTest(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册