提交 d684ecad 编写于 作者: D Daniel Graña

Merge pull request #846 from rocioar/master

fix dont_merge_cookies bad behaviour when set to false on meta
......@@ -634,8 +634,8 @@ settings (see the settings documentation for more info):
.. reqmeta:: dont_redirect
If :attr:`Request.meta <scrapy.http.Request.meta>` contains the
``dont_redirect`` key, the request will be ignored by this middleware.
If :attr:`Request.meta <scrapy.http.Request.meta>` has ``dont_redirect``
key set to True, the request will be ignored by this middleware.
RedirectMiddleware settings
......@@ -732,8 +732,8 @@ to indicate server overload, which would be something we want to retry.
.. reqmeta:: dont_retry
If :attr:`Request.meta <scrapy.http.Request.meta>` contains the ``dont_retry``
key, the request will be ignored by this middleware.
If :attr:`Request.meta <scrapy.http.Request.meta>` has ``dont_retry`` key
set to True, the request will be ignored by this middleware.
RetryMiddleware Settings
~~~~~~~~~~~~~~~~~~~~~~~~
......
......@@ -83,7 +83,7 @@ Request objects
cookies for that domain and will be sent again in future requests. That's
the typical behaviour of any regular web browser. However, if, for some
reason, you want to avoid merging with existing cookies you can instruct
Scrapy to do so by setting the ``dont_merge_cookies`` key in the
Scrapy to do so by setting the ``dont_merge_cookies`` key to True in the
:attr:`Request.meta`.
Example of request without merging cookies::
......@@ -102,7 +102,7 @@ Request objects
:param priority: the priority of this request (defaults to ``0``).
The priority is used by the scheduler to define the order used to process
requests. Requests with a higher priority value will execute earlier.
requests. Requests with a higher priority value will execute earlier.
Negative values are allowed in order to indicate relatively low-priority.
:type priority: int
......
......@@ -22,7 +22,7 @@ class CookiesMiddleware(object):
return cls(crawler.settings.getbool('COOKIES_DEBUG'))
def process_request(self, request, spider):
if 'dont_merge_cookies' in request.meta:
if request.meta.get('dont_merge_cookies', False):
return
cookiejarkey = request.meta.get("cookiejar")
......@@ -37,7 +37,7 @@ class CookiesMiddleware(object):
self._debug_cookie(request, spider)
def process_response(self, request, response, spider):
if 'dont_merge_cookies' in request.meta:
if request.meta.get('dont_merge_cookies', False):
return response
# extract cookies from Set-Cookie and drop invalid/expired cookies
......
......@@ -52,7 +52,7 @@ class RedirectMiddleware(BaseRedirectMiddleware):
"""Handle redirection of requests based on response status and meta-refresh html tag"""
def process_response(self, request, response, spider):
if 'dont_redirect' in request.meta:
if request.meta.get('dont_redirect', False):
return response
if request.method == 'HEAD':
......@@ -86,7 +86,7 @@ class MetaRefreshMiddleware(BaseRedirectMiddleware):
settings.getint('METAREFRESH_MAXDELAY'))
def process_response(self, request, response, spider):
if 'dont_redirect' in request.meta or request.method == 'HEAD' or \
if request.meta.get('dont_redirect', False) or request.method == 'HEAD' or \
not isinstance(response, HtmlResponse):
return response
......
......@@ -50,7 +50,7 @@ class RetryMiddleware(object):
return cls(crawler.settings)
def process_response(self, request, response, spider):
if 'dont_retry' in request.meta:
if request.meta.get('dont_retry', False):
return response
if response.status in self.retry_http_codes:
reason = response_status_message(response.status)
......@@ -59,8 +59,8 @@ class RetryMiddleware(object):
def process_exception(self, request, exception, spider):
if isinstance(exception, self.EXCEPTIONS_TO_RETRY) \
and 'dont_retry' not in request.meta:
return self._retry(request, exception, spider)
and not request.meta.get('dont_retry', False):
return self._retry(request, exception, spider)
def _retry(self, request, reason, spider):
retries = request.meta.get('retry_times', 0) + 1
......
......@@ -52,10 +52,16 @@ class CookiesMiddlewareTest(TestCase):
res = Response('http://scrapytest.org/dontmerge', headers={'Set-Cookie': 'dont=mergeme; path=/'})
assert self.mw.process_response(req, res, self.spider) is res
# check that cookies are merged back
req = Request('http://scrapytest.org/mergeme')
assert self.mw.process_request(req, self.spider) is None
self.assertEquals(req.headers.get('Cookie'), 'C1=value1')
# check that cookies are merged when dont_merge_cookies is passed as 0
req = Request('http://scrapytest.org/mergeme', meta={'dont_merge_cookies': 0})
assert self.mw.process_request(req, self.spider) is None
self.assertEquals(req.headers.get('Cookie'), 'C1=value1')
def test_complex_cookies(self):
# merge some cookies into jar
cookies = [{'name': 'C1', 'value': 'value1', 'path': '/foo', 'domain': 'scrapytest.org'},
......
......@@ -50,6 +50,15 @@ class RedirectMiddlewareTest(unittest.TestCase):
assert isinstance(r, Response)
assert r is rsp
# Test that it redirects when dont_redirect is False
req = Request(url, meta={'dont_redirect': False})
rsp = Response(url2, status=200)
r = self.mw.process_response(req, rsp, self.spider)
assert isinstance(r, Response)
assert r is rsp
def test_redirect_302(self):
url = 'http://www.example.com/302'
url2 = 'http://www.example.com/redirected2'
......
......@@ -40,6 +40,14 @@ class RetryTest(unittest.TestCase):
r = self.mw.process_response(req, rsp, self.spider)
assert r is rsp
# Test retry when dont_retry set to False
req = Request('http://www.scrapytest.org/503', meta={'dont_retry': False})
rsp = Response('http://www.scrapytest.org/503')
# first retry
r = self.mw.process_response(req, rsp, self.spider)
assert r is rsp
def test_dont_retry_exc(self):
req = Request('http://www.scrapytest.org/503', meta={'dont_retry': True})
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册