add 2 more test cases and minor doc fixes

a69f042d · tpeng · fa84730e · a69f042d · a69f042d · a69f042d
4 changed file
--- a/docs/topics/settings.rst
+++ b/docs/topics/settings.rst
@@ -427,7 +427,7 @@ The amount of time (in secs) that the downloader will wait before timing out.
 DOWNLOAD_MAXSIZE
 ----------------
-Default: `1073741824` (1024Mb)
+Default: `1073741824` (1024MB)
 The maximum response size (in bytes) that downloader will download.
@@ -439,12 +439,14 @@ If you want to disable it set to 0.
    spider attribute and per-request using :reqmeta:`download_maxsize`
    Request.meta key.
+    This feature needs Twisted >= 11.1.
 .. setting:: DOWNLOAD_WARNSIZE
 DOWNLOAD_WARNSIZE
----------------
+-----------------
-Default: `33554432` (32Mb)
+Default: `33554432` (32MB)
 The response size (in bytes) that downloader will start to warn.
@@ -456,6 +458,8 @@ If you want to disable it set to 0.
    spider attribute and per-request using :reqmeta:`download_warnsize`
    Request.meta key.
+    This feature needs Twisted >= 11.1.
 .. setting:: DUPEFILTER_CLASS
 DUPEFILTER_CLASS

--- a/scrapy/settings/default_settings.py
+++ b/scrapy/settings/default_settings.py
@@ -66,8 +66,8 @@ DOWNLOAD_HANDLERS_BASE = {
 DOWNLOAD_TIMEOUT = 180      # 3mins
-DOWNLOAD_MAXSIZE = 1073741824   # 1024m
+DOWNLOAD_MAXSIZE = 1024*1024*1024   # 1024m
-DOWNLOAD_WARNSIZE = 33554432    # 32m
+DOWNLOAD_WARNSIZE = 32*1024*1024    # 32m
 DOWNLOADER = 'scrapy.core.downloader.Downloader'

--- a/tests/mockserver.py
+++ b/tests/mockserver.py
 from __future__ import print_function
 import sys, time, random, urllib, os, json
 from subprocess import Popen, PIPE
-from twisted.web.server import Site, NOT_DONE_YET
+from twisted.web.server import Site, NOT_DONE_YET, GzipEncoderFactory
-from twisted.web.resource import Resource
+from twisted.web.resource import Resource, EncodingResourceWrapper
 from twisted.internet import reactor, defer, ssl
+from twisted.web.test.test_webclient import PayloadResource
 from scrapy import twisted_version
@@ -167,6 +168,8 @@ class Root(Resource):
        self.putChild("drop", Drop())
        self.putChild("raw", Raw())
        self.putChild("echo", Echo())
+        self.putChild('payload', PayloadResource())
+        self.putChild("xpayload", EncodingResourceWrapper(PayloadResource(), [GzipEncoderFactory()]))
    def getChild(self, name, request):
        return self

--- a/tests/test_downloader_handlers.py
+++ b/tests/test_downloader_handlers.py
@@ -220,6 +220,20 @@ class Http11TestCase(HttpTestCase):
        d.addCallback(self.assertEquals, "0123456789")
        return d
+    @defer.inlineCallbacks
+    def test_download_with_maxsize(self):
+        request = Request(self.getURL('file'))
+        # 10 is minimal size for this request and the limit is only counted on
+        # response body. (regardless of headers)
+        d = self.download_request(request, Spider('foo', download_maxsize=10))
+        d.addCallback(lambda r: r.body)
+        d.addCallback(self.assertEquals, "0123456789")
+        yield d
+        d = self.download_request(request, Spider('foo', download_maxsize=9))
+        yield self.assertFailure(d, defer.CancelledError, error.ConnectionAborted)
    @defer.inlineCallbacks
    def test_download_with_maxsize_per_req(self):
        meta = {'download_maxsize': 2}
@@ -271,6 +285,26 @@ class Http11MockServerTestCase(unittest.TestCase):
        reason = crawler.spider.meta['close_reason']
        self.assertTrue(reason, 'finished')
+    @defer.inlineCallbacks
+    def test_download_gzip_response(self):
+        crawler = get_crawler(SingleRequestSpider)
+        body = '1'*100 # PayloadResource requires body length to be 100
+        request = Request('http://localhost:8998/payload', method='POST', body=body, meta={'download_maxsize': 50})
+        yield crawler.crawl(seed=request)
+        failure = crawler.spider.meta['failure']
+        # download_maxsize < 100, hence the CancelledError
+        self.assertIsInstance(failure.value, defer.CancelledError)
+        request.headers.setdefault('Accept-Encoding', 'gzip,deflate')
+        request = request.replace(url='http://localhost:8998/xpayload')
+        yield crawler.crawl(seed=request)
+        # download_maxsize = 50 is enough for the gzipped response
+        failure = crawler.spider.meta.get('failure')
+        self.assertTrue(failure == None)
+        reason = crawler.spider.meta['close_reason']
+        self.assertTrue(reason, 'finished')
 class UriResource(resource.Resource):
    """Return the full uri that was requested"""