未验证 提交 9f8c3938 编写于 作者: A Andrey Rahmatullin 提交者: GitHub

Merge pull request #4823 from elacuesta/cookies-revert-header

Do not process cookies from headers
......@@ -207,6 +207,11 @@ CookiesMiddleware
a warning. Refer to :ref:`topics-logging-advanced-customization`
to customize the logging behaviour.
.. caution:: Cookies set via the ``Cookie`` header are not considered by the
:ref:`cookies-mw`. If you need to set cookies for a request, use the
:class:`Request.cookies <scrapy.http.Request>` parameter. This is a known
current limitation that is being worked on.
The following settings can be used to configure the cookie middleware:
* :setting:`COOKIES_ENABLED`
......
......@@ -61,6 +61,12 @@ Request objects
:param headers: the headers of this request. The dict values can be strings
(for single valued headers) or lists (for multi-valued headers). If
``None`` is passed as value, the HTTP header will not be sent at all.
.. caution:: Cookies set via the ``Cookie`` header are not considered by the
:ref:`cookies-mw`. If you need to set cookies for a request, use the
:class:`Request.cookies <scrapy.http.Request>` parameter. This is a known
current limitation that is being worked on.
:type headers: dict
:param cookies: the request cookies. These can be sent in two forms.
......@@ -102,6 +108,12 @@ Request objects
)
For more info see :ref:`cookies-mw`.
.. caution:: Cookies set via the ``Cookie`` header are not considered by the
:ref:`cookies-mw`. If you need to set cookies for a request, use the
:class:`Request.cookies <scrapy.http.Request>` parameter. This is a known
current limitation that is being worked on.
:type cookies: dict or list
:param encoding: the encoding of this request (defaults to ``'utf-8'``).
......
......@@ -352,6 +352,11 @@ Default::
The default headers used for Scrapy HTTP Requests. They're populated in the
:class:`~scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware`.
.. caution:: Cookies set via the ``Cookie`` header are not considered by the
:ref:`cookies-mw`. If you need to set cookies for a request, use the
:class:`Request.cookies <scrapy.http.Request>` parameter. This is a known
current limitation that is being worked on.
.. setting:: DEPTH_LIMIT
DEPTH_LIMIT
......
......@@ -97,35 +97,14 @@ class CookiesMiddleware:
def _get_request_cookies(self, jar, request):
"""
Extract cookies from a Request. Values from the `Request.cookies` attribute
take precedence over values from the `Cookie` request header.
Extract cookies from the Request.cookies attribute
"""
def get_cookies_from_header(jar, request):
cookie_header = request.headers.get("Cookie")
if not cookie_header:
return []
cookie_gen_bytes = (s.strip() for s in cookie_header.split(b";"))
cookie_list_unicode = []
for cookie_bytes in cookie_gen_bytes:
try:
cookie_unicode = cookie_bytes.decode("utf8")
except UnicodeDecodeError:
logger.warning("Non UTF-8 encoded cookie found in request %s: %s",
request, cookie_bytes)
cookie_unicode = cookie_bytes.decode("latin1", errors="replace")
cookie_list_unicode.append(cookie_unicode)
response = Response(request.url, headers={"Set-Cookie": cookie_list_unicode})
return jar.make_cookies(response, request)
def get_cookies_from_attribute(jar, request):
if not request.cookies:
return []
elif isinstance(request.cookies, dict):
cookies = ({"name": k, "value": v} for k, v in request.cookies.items())
else:
cookies = request.cookies
formatted = filter(None, (self._format_cookie(c, request) for c in cookies))
response = Response(request.url, headers={"Set-Cookie": formatted})
return jar.make_cookies(response, request)
return get_cookies_from_header(jar, request) + get_cookies_from_attribute(jar, request)
if not request.cookies:
return []
elif isinstance(request.cookies, dict):
cookies = ({"name": k, "value": v} for k, v in request.cookies.items())
else:
cookies = request.cookies
formatted = filter(None, (self._format_cookie(c, request) for c in cookies))
response = Response(request.url, headers={"Set-Cookie": formatted})
return jar.make_cookies(response, request)
......@@ -2,6 +2,8 @@ import logging
from testfixtures import LogCapture
from unittest import TestCase
import pytest
from scrapy.downloadermiddlewares.cookies import CookiesMiddleware
from scrapy.downloadermiddlewares.defaultheaders import DefaultHeadersMiddleware
from scrapy.exceptions import NotConfigured
......@@ -243,6 +245,7 @@ class CookiesMiddlewareTest(TestCase):
self.assertIn('Cookie', request.headers)
self.assertEqual(b'currencyCookie=USD', request.headers['Cookie'])
@pytest.mark.xfail(reason="Cookie header is not currently being processed")
def test_keep_cookie_from_default_request_headers_middleware(self):
DEFAULT_REQUEST_HEADERS = dict(Cookie='default=value; asdf=qwerty')
mw_default_headers = DefaultHeadersMiddleware(DEFAULT_REQUEST_HEADERS.items())
......@@ -257,6 +260,7 @@ class CookiesMiddlewareTest(TestCase):
assert self.mw.process_request(req2, self.spider) is None
self.assertCookieValEqual(req2.headers['Cookie'], b'default=value; a=b; asdf=qwerty')
@pytest.mark.xfail(reason="Cookie header is not currently being processed")
def test_keep_cookie_header(self):
# keep only cookies from 'Cookie' request header
req1 = Request('http://scrapytest.org', headers={'Cookie': 'a=b; c=d'})
......@@ -291,6 +295,7 @@ class CookiesMiddlewareTest(TestCase):
assert self.mw.process_request(req3, self.spider) is None
self.assertCookieValEqual(req3.headers['Cookie'], b'a=\xc3\xa1')
@pytest.mark.xfail(reason="Cookie header is not currently being processed")
def test_request_headers_cookie_encoding(self):
# 1) UTF8-encoded bytes
req1 = Request('http://example.org', headers={'Cookie': 'a=á'.encode('utf8')})
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册