Merge pull request #846 from rocioar/master

fix dont_merge_cookies bad behaviour when set to false on meta

Merge pull request #846 from rocioar/master
fix dont_merge_cookies bad behaviour when set to false on meta
d684ecad · Daniel Graña · 94d00b2a · 51b0bd28 · d684ecad · d684ecad
8 changed file
--- a/docs/topics/downloader-middleware.rst
+++ b/docs/topics/downloader-middleware.rst
@@ -634,8 +634,8 @@ settings (see the settings documentation for more info):

 .. reqmeta:: dont_redirect

-If :attr:`Request.meta <scrapy.http.Request.meta>` contains the
-``dont_redirect`` key, the request will be ignored by this middleware.
+If :attr:`Request.meta <scrapy.http.Request.meta>` has ``dont_redirect``
+key set to True, the request will be ignored by this middleware.


 RedirectMiddleware settings
@@ -732,8 +732,8 @@ to indicate server overload, which would be something we want to retry.

 .. reqmeta:: dont_retry

-If :attr:`Request.meta <scrapy.http.Request.meta>` contains the ``dont_retry``
-key, the request will be ignored by this middleware.
+If :attr:`Request.meta <scrapy.http.Request.meta>` has ``dont_retry`` key
+set to True, the request will be ignored by this middleware.

 RetryMiddleware Settings
 ~~~~~~~~~~~~~~~~~~~~~~~~

--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@@ -83,7 +83,7 @@ Request objects
        cookies for that domain and will be sent again in future requests. That's
        the typical behaviour of any regular web browser. However, if, for some
        reason, you want to avoid merging with existing cookies you can instruct
-        Scrapy to do so by setting the ``dont_merge_cookies`` key in the
+        Scrapy to do so by setting the ``dont_merge_cookies`` key to True in the
        :attr:`Request.meta`.

        Example of request without merging cookies::
@@ -102,7 +102,7 @@ Request objects

    :param priority: the priority of this request (defaults to ``0``).
       The priority is used by the scheduler to define the order used to process
-       requests.  Requests with a higher priority value will execute earlier.  
+       requests.  Requests with a higher priority value will execute earlier.
       Negative values are allowed in order to indicate relatively low-priority.
    :type priority: int


--- a/scrapy/contrib/downloadermiddleware/cookies.py
+++ b/scrapy/contrib/downloadermiddleware/cookies.py
@@ -22,7 +22,7 @@ class CookiesMiddleware(object):
        return cls(crawler.settings.getbool('COOKIES_DEBUG'))

    def process_request(self, request, spider):
-        if 'dont_merge_cookies' in request.meta:
+        if request.meta.get('dont_merge_cookies', False):
            return

        cookiejarkey = request.meta.get("cookiejar")
@@ -37,7 +37,7 @@ class CookiesMiddleware(object):
        self._debug_cookie(request, spider)

    def process_response(self, request, response, spider):
-        if 'dont_merge_cookies' in request.meta:
+        if request.meta.get('dont_merge_cookies', False):
            return response

        # extract cookies from Set-Cookie and drop invalid/expired cookies

--- a/scrapy/contrib/downloadermiddleware/redirect.py
+++ b/scrapy/contrib/downloadermiddleware/redirect.py
@@ -52,7 +52,7 @@ class RedirectMiddleware(BaseRedirectMiddleware):
    """Handle redirection of requests based on response status and meta-refresh html tag"""

    def process_response(self, request, response, spider):
-        if 'dont_redirect' in request.meta:
+        if request.meta.get('dont_redirect', False):
            return response

        if request.method == 'HEAD':
@@ -86,7 +86,7 @@ class MetaRefreshMiddleware(BaseRedirectMiddleware):
                                         settings.getint('METAREFRESH_MAXDELAY'))

    def process_response(self, request, response, spider):
-        if 'dont_redirect' in request.meta or request.method == 'HEAD' or \
+        if request.meta.get('dont_redirect', False) or request.method == 'HEAD' or \
                not isinstance(response, HtmlResponse):
            return response


--- a/scrapy/contrib/downloadermiddleware/retry.py
+++ b/scrapy/contrib/downloadermiddleware/retry.py
@@ -50,7 +50,7 @@ class RetryMiddleware(object):
        return cls(crawler.settings)

    def process_response(self, request, response, spider):
-        if 'dont_retry' in request.meta:
+        if request.meta.get('dont_retry', False):
            return response
        if response.status in self.retry_http_codes:
            reason = response_status_message(response.status)
@@ -59,8 +59,8 @@ class RetryMiddleware(object):

    def process_exception(self, request, exception, spider):
        if isinstance(exception, self.EXCEPTIONS_TO_RETRY) \
-                and 'dont_retry' not in request.meta:
-            return self._retry(request, exception, spider)
+                and not request.meta.get('dont_retry', False):
+             return self._retry(request, exception, spider)

    def _retry(self, request, reason, spider):
        retries = request.meta.get('retry_times', 0) + 1

--- a/tests/test_downloadermiddleware_cookies.py
+++ b/tests/test_downloadermiddleware_cookies.py
@@ -52,10 +52,16 @@ class CookiesMiddlewareTest(TestCase):
        res = Response('http://scrapytest.org/dontmerge', headers={'Set-Cookie': 'dont=mergeme; path=/'})
        assert self.mw.process_response(req, res, self.spider) is res

+        # check that cookies are merged back
        req = Request('http://scrapytest.org/mergeme')
        assert self.mw.process_request(req, self.spider) is None
        self.assertEquals(req.headers.get('Cookie'), 'C1=value1')

+        # check that cookies are merged when dont_merge_cookies is passed as 0
+        req = Request('http://scrapytest.org/mergeme', meta={'dont_merge_cookies': 0})
+        assert self.mw.process_request(req, self.spider) is None
+        self.assertEquals(req.headers.get('Cookie'), 'C1=value1')
+
    def test_complex_cookies(self):
        # merge some cookies into jar
        cookies = [{'name': 'C1', 'value': 'value1', 'path': '/foo', 'domain': 'scrapytest.org'},

--- a/tests/test_downloadermiddleware_redirect.py
+++ b/tests/test_downloadermiddleware_redirect.py
@@ -50,6 +50,15 @@ class RedirectMiddlewareTest(unittest.TestCase):
        assert isinstance(r, Response)
        assert r is rsp

+        # Test that it redirects when dont_redirect is False
+        req = Request(url, meta={'dont_redirect': False})
+        rsp = Response(url2, status=200)
+
+        r = self.mw.process_response(req, rsp, self.spider)
+        assert isinstance(r, Response)
+        assert r is rsp
+
+
    def test_redirect_302(self):
        url = 'http://www.example.com/302'
        url2 = 'http://www.example.com/redirected2'

--- a/tests/test_downloadermiddleware_retry.py
+++ b/tests/test_downloadermiddleware_retry.py
@@ -40,6 +40,14 @@ class RetryTest(unittest.TestCase):
        r = self.mw.process_response(req, rsp, self.spider)
        assert r is rsp

+        # Test retry when dont_retry set to False
+        req = Request('http://www.scrapytest.org/503', meta={'dont_retry': False})
+        rsp = Response('http://www.scrapytest.org/503')
+
+        # first retry
+        r = self.mw.process_response(req, rsp, self.spider)
+        assert r is rsp
+
    def test_dont_retry_exc(self):
        req = Request('http://www.scrapytest.org/503', meta={'dont_retry': True})