提交 a69f042d 编写于 作者: T tpeng

add 2 more test cases and minor doc fixes

上级 fa84730e
......@@ -427,7 +427,7 @@ The amount of time (in secs) that the downloader will wait before timing out.
DOWNLOAD_MAXSIZE
----------------
Default: `1073741824` (1024Mb)
Default: `1073741824` (1024MB)
The maximum response size (in bytes) that downloader will download.
......@@ -439,12 +439,14 @@ If you want to disable it set to 0.
spider attribute and per-request using :reqmeta:`download_maxsize`
Request.meta key.
This feature needs Twisted >= 11.1.
.. setting:: DOWNLOAD_WARNSIZE
DOWNLOAD_WARNSIZE
----------------
-----------------
Default: `33554432` (32Mb)
Default: `33554432` (32MB)
The response size (in bytes) that downloader will start to warn.
......@@ -456,6 +458,8 @@ If you want to disable it set to 0.
spider attribute and per-request using :reqmeta:`download_warnsize`
Request.meta key.
This feature needs Twisted >= 11.1.
.. setting:: DUPEFILTER_CLASS
DUPEFILTER_CLASS
......
......@@ -66,8 +66,8 @@ DOWNLOAD_HANDLERS_BASE = {
DOWNLOAD_TIMEOUT = 180 # 3mins
DOWNLOAD_MAXSIZE = 1073741824 # 1024m
DOWNLOAD_WARNSIZE = 33554432 # 32m
DOWNLOAD_MAXSIZE = 1024*1024*1024 # 1024m
DOWNLOAD_WARNSIZE = 32*1024*1024 # 32m
DOWNLOADER = 'scrapy.core.downloader.Downloader'
......
from __future__ import print_function
import sys, time, random, urllib, os, json
from subprocess import Popen, PIPE
from twisted.web.server import Site, NOT_DONE_YET
from twisted.web.resource import Resource
from twisted.web.server import Site, NOT_DONE_YET, GzipEncoderFactory
from twisted.web.resource import Resource, EncodingResourceWrapper
from twisted.internet import reactor, defer, ssl
from twisted.web.test.test_webclient import PayloadResource
from scrapy import twisted_version
......@@ -167,6 +168,8 @@ class Root(Resource):
self.putChild("drop", Drop())
self.putChild("raw", Raw())
self.putChild("echo", Echo())
self.putChild('payload', PayloadResource())
self.putChild("xpayload", EncodingResourceWrapper(PayloadResource(), [GzipEncoderFactory()]))
def getChild(self, name, request):
return self
......
......@@ -220,6 +220,20 @@ class Http11TestCase(HttpTestCase):
d.addCallback(self.assertEquals, "0123456789")
return d
@defer.inlineCallbacks
def test_download_with_maxsize(self):
request = Request(self.getURL('file'))
# 10 is minimal size for this request and the limit is only counted on
# response body. (regardless of headers)
d = self.download_request(request, Spider('foo', download_maxsize=10))
d.addCallback(lambda r: r.body)
d.addCallback(self.assertEquals, "0123456789")
yield d
d = self.download_request(request, Spider('foo', download_maxsize=9))
yield self.assertFailure(d, defer.CancelledError, error.ConnectionAborted)
@defer.inlineCallbacks
def test_download_with_maxsize_per_req(self):
meta = {'download_maxsize': 2}
......@@ -271,6 +285,26 @@ class Http11MockServerTestCase(unittest.TestCase):
reason = crawler.spider.meta['close_reason']
self.assertTrue(reason, 'finished')
@defer.inlineCallbacks
def test_download_gzip_response(self):
crawler = get_crawler(SingleRequestSpider)
body = '1'*100 # PayloadResource requires body length to be 100
request = Request('http://localhost:8998/payload', method='POST', body=body, meta={'download_maxsize': 50})
yield crawler.crawl(seed=request)
failure = crawler.spider.meta['failure']
# download_maxsize < 100, hence the CancelledError
self.assertIsInstance(failure.value, defer.CancelledError)
request.headers.setdefault('Accept-Encoding', 'gzip,deflate')
request = request.replace(url='http://localhost:8998/xpayload')
yield crawler.crawl(seed=request)
# download_maxsize = 50 is enough for the gzipped response
failure = crawler.spider.meta.get('failure')
self.assertTrue(failure == None)
reason = crawler.spider.meta['close_reason']
self.assertTrue(reason, 'finished')
class UriResource(resource.Resource):
"""Return the full uri that was requested"""
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册