提交 a69f042d 编写于 作者: T tpeng

add 2 more test cases and minor doc fixes

上级 fa84730e
...@@ -427,7 +427,7 @@ The amount of time (in secs) that the downloader will wait before timing out. ...@@ -427,7 +427,7 @@ The amount of time (in secs) that the downloader will wait before timing out.
DOWNLOAD_MAXSIZE DOWNLOAD_MAXSIZE
---------------- ----------------
Default: `1073741824` (1024Mb) Default: `1073741824` (1024MB)
The maximum response size (in bytes) that downloader will download. The maximum response size (in bytes) that downloader will download.
...@@ -439,12 +439,14 @@ If you want to disable it set to 0. ...@@ -439,12 +439,14 @@ If you want to disable it set to 0.
spider attribute and per-request using :reqmeta:`download_maxsize` spider attribute and per-request using :reqmeta:`download_maxsize`
Request.meta key. Request.meta key.
This feature needs Twisted >= 11.1.
.. setting:: DOWNLOAD_WARNSIZE .. setting:: DOWNLOAD_WARNSIZE
DOWNLOAD_WARNSIZE DOWNLOAD_WARNSIZE
---------------- -----------------
Default: `33554432` (32Mb) Default: `33554432` (32MB)
The response size (in bytes) that downloader will start to warn. The response size (in bytes) that downloader will start to warn.
...@@ -456,6 +458,8 @@ If you want to disable it set to 0. ...@@ -456,6 +458,8 @@ If you want to disable it set to 0.
spider attribute and per-request using :reqmeta:`download_warnsize` spider attribute and per-request using :reqmeta:`download_warnsize`
Request.meta key. Request.meta key.
This feature needs Twisted >= 11.1.
.. setting:: DUPEFILTER_CLASS .. setting:: DUPEFILTER_CLASS
DUPEFILTER_CLASS DUPEFILTER_CLASS
......
...@@ -66,8 +66,8 @@ DOWNLOAD_HANDLERS_BASE = { ...@@ -66,8 +66,8 @@ DOWNLOAD_HANDLERS_BASE = {
DOWNLOAD_TIMEOUT = 180 # 3mins DOWNLOAD_TIMEOUT = 180 # 3mins
DOWNLOAD_MAXSIZE = 1073741824 # 1024m DOWNLOAD_MAXSIZE = 1024*1024*1024 # 1024m
DOWNLOAD_WARNSIZE = 33554432 # 32m DOWNLOAD_WARNSIZE = 32*1024*1024 # 32m
DOWNLOADER = 'scrapy.core.downloader.Downloader' DOWNLOADER = 'scrapy.core.downloader.Downloader'
......
from __future__ import print_function from __future__ import print_function
import sys, time, random, urllib, os, json import sys, time, random, urllib, os, json
from subprocess import Popen, PIPE from subprocess import Popen, PIPE
from twisted.web.server import Site, NOT_DONE_YET from twisted.web.server import Site, NOT_DONE_YET, GzipEncoderFactory
from twisted.web.resource import Resource from twisted.web.resource import Resource, EncodingResourceWrapper
from twisted.internet import reactor, defer, ssl from twisted.internet import reactor, defer, ssl
from twisted.web.test.test_webclient import PayloadResource
from scrapy import twisted_version from scrapy import twisted_version
...@@ -167,6 +168,8 @@ class Root(Resource): ...@@ -167,6 +168,8 @@ class Root(Resource):
self.putChild("drop", Drop()) self.putChild("drop", Drop())
self.putChild("raw", Raw()) self.putChild("raw", Raw())
self.putChild("echo", Echo()) self.putChild("echo", Echo())
self.putChild('payload', PayloadResource())
self.putChild("xpayload", EncodingResourceWrapper(PayloadResource(), [GzipEncoderFactory()]))
def getChild(self, name, request): def getChild(self, name, request):
return self return self
......
...@@ -220,6 +220,20 @@ class Http11TestCase(HttpTestCase): ...@@ -220,6 +220,20 @@ class Http11TestCase(HttpTestCase):
d.addCallback(self.assertEquals, "0123456789") d.addCallback(self.assertEquals, "0123456789")
return d return d
@defer.inlineCallbacks
def test_download_with_maxsize(self):
request = Request(self.getURL('file'))
# 10 is minimal size for this request and the limit is only counted on
# response body. (regardless of headers)
d = self.download_request(request, Spider('foo', download_maxsize=10))
d.addCallback(lambda r: r.body)
d.addCallback(self.assertEquals, "0123456789")
yield d
d = self.download_request(request, Spider('foo', download_maxsize=9))
yield self.assertFailure(d, defer.CancelledError, error.ConnectionAborted)
@defer.inlineCallbacks @defer.inlineCallbacks
def test_download_with_maxsize_per_req(self): def test_download_with_maxsize_per_req(self):
meta = {'download_maxsize': 2} meta = {'download_maxsize': 2}
...@@ -271,6 +285,26 @@ class Http11MockServerTestCase(unittest.TestCase): ...@@ -271,6 +285,26 @@ class Http11MockServerTestCase(unittest.TestCase):
reason = crawler.spider.meta['close_reason'] reason = crawler.spider.meta['close_reason']
self.assertTrue(reason, 'finished') self.assertTrue(reason, 'finished')
@defer.inlineCallbacks
def test_download_gzip_response(self):
crawler = get_crawler(SingleRequestSpider)
body = '1'*100 # PayloadResource requires body length to be 100
request = Request('http://localhost:8998/payload', method='POST', body=body, meta={'download_maxsize': 50})
yield crawler.crawl(seed=request)
failure = crawler.spider.meta['failure']
# download_maxsize < 100, hence the CancelledError
self.assertIsInstance(failure.value, defer.CancelledError)
request.headers.setdefault('Accept-Encoding', 'gzip,deflate')
request = request.replace(url='http://localhost:8998/xpayload')
yield crawler.crawl(seed=request)
# download_maxsize = 50 is enough for the gzipped response
failure = crawler.spider.meta.get('failure')
self.assertTrue(failure == None)
reason = crawler.spider.meta['close_reason']
self.assertTrue(reason, 'finished')
class UriResource(resource.Resource): class UriResource(resource.Resource):
"""Return the full uri that was requested""" """Return the full uri that was requested"""
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册