From 0c9631594d8e3d2e1917e2d46ec67afcf719d1bf Mon Sep 17 00:00:00 2001 From: Pablo Hoffman Date: Tue, 24 Mar 2009 20:02:42 +0000 Subject: [PATCH] - made Request.copy() use Request.replace() - added callback, dont_filter, encoding to Request copy/replace - fixed Request.replace() and Response.replace() which weren't working properly with empty arguments - added new test cases - added doc about copying requests and deferreds --HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%401012 --- scrapy/trunk/docs/ref/request-response.rst | 32 ++++++++++++-- scrapy/trunk/scrapy/http/request/__init__.py | 44 +++++++++---------- scrapy/trunk/scrapy/http/response/__init__.py | 12 ++--- .../trunk/scrapy/tests/test_http_request.py | 13 ++++++ .../trunk/scrapy/tests/test_http_response.py | 7 +++ 5 files changed, 76 insertions(+), 32 deletions(-) diff --git a/scrapy/trunk/docs/ref/request-response.rst b/scrapy/trunk/docs/ref/request-response.rst index 3dacdd691..7fce0df4b 100644 --- a/scrapy/trunk/docs/ref/request-response.rst +++ b/scrapy/trunk/docs/ref/request-response.rst @@ -35,7 +35,7 @@ Request objects ``callback`` is a function that will be called with the response of this request (once its downloaded) as its first parameter. For more information - see :ref:`ref-request-callbacks` below. + see :ref:`ref-request-callback-arguments` below. ``method`` is a string with the HTTP method of this request @@ -125,19 +125,43 @@ Request Methods .. method:: Request.copy() Return a new Request which is a copy of this Request. The attribute - :attr:`Request.meta` is copied, while :attr:`Request.cache` is not. + :attr:`Request.meta` is copied, while :attr:`Request.cache` is not. See also + :ref:`ref-request-callback-arguments`. .. method:: Request.replace() Return a Request object with the same members, except for those members given new values by whichever keyword arguments are specified. The attribute - :attr:`Request.meta` is copied, while :attr:`Request.cache` is not. + :attr:`Request.meta` is copied, while :attr:`Request.cache` is not. See also + :ref:`ref-request-callback-arguments`. .. method:: Request.httprepr() Return a string with the raw HTTP representation of this response. -.. _ref-request-callbacks: +.. _ref-request-callback-copy: + +Copying Requests and callbacks +------------------------------ + +When you copy a request using the :meth:`Request.copy` or +:meth:`Request.replace` methods the callback of the request is not copied by +default. This is because of legacy reasons along with limitations in the +underlying network library, which doesn't allow sharing `Twisted deferreds`. + +.. _Twisted deferreds: http://twistedmatrix.com/projects/core/documentation/howto/defer.html + +For example:: + + request = Request("http://www.example.com", callback=myfunc) + request2 = request.copy() # doesn't copy the callback + request3 = request.replace(callback=request.callback) + +In the above example, ``request2`` is a copy of ``request`` but it has no +callback, while ``request3`` is a copy of ``request`` and also contains the +callback. + +.. _ref-request-callback-arguments: Passing arguments to callback functions --------------------------------------- diff --git a/scrapy/trunk/scrapy/http/request/__init__.py b/scrapy/trunk/scrapy/http/request/__init__.py index 2377daa2c..e23f77280 100644 --- a/scrapy/trunk/scrapy/http/request/__init__.py +++ b/scrapy/trunk/scrapy/http/request/__init__.py @@ -18,7 +18,7 @@ from scrapy.utils.defer import chain_deferred class Request(object): def __init__(self, url, callback=None, method='GET', headers=None, body=None, - cookies=None, meta=None, encoding='utf-8', dont_filter=None): + cookies=None, meta=None, encoding='utf-8', dont_filter=False): self._encoding = encoding # this one has to be set first self.method = method.upper() @@ -37,10 +37,13 @@ class Request(object): self.cache = {} def set_url(self, url): - assert isinstance(url, basestring), \ - 'Request url argument must be str or unicode, got %s:' % type(url).__name__ - decoded_url = url if isinstance(url, unicode) else url.decode(self.encoding) - self._url = Url(safe_url_string(decoded_url, self.encoding)) + if isinstance(url, basestring): + decoded_url = url if isinstance(url, unicode) else url.decode(self.encoding) + self._url = Url(safe_url_string(decoded_url, self.encoding)) + elif isinstance(url, Url): + self._url = url + else: + raise TypeError('Request url must be str or unicode, got %s:' % type(url).__name__) url = property(lambda x: x._url, set_url) def set_body(self, body): @@ -76,26 +79,23 @@ class Request(object): return "%s(%s)" % (self.__class__.__name__, repr(d)) def copy(self): - """Return a copy a of this Request""" - new = copy.copy(self) - new.cache = {} - for att in self.__dict__: - if att not in ['cache', 'url', 'deferred']: - value = getattr(self, att) - setattr(new, att, copy.copy(value)) - new.deferred = defer.Deferred() - return new - - def replace(self, url=None, method=None, headers=None, body=None, cookies=None, meta=None): + """Return a copy of this Request""" + return self.replace() + + def replace(self, url=None, callback=None, method=None, headers=None, body=None, + cookies=None, meta=None, encoding=None, dont_filter=None): """Create a new Request with the same attributes except for those given new values. """ - return self.__class__(url=url or self.url, - method=method or self.method, - headers=headers or copy.deepcopy(self.headers), - body=body or self.body, - cookies=cookies or self.cookies, - meta=meta or self.meta) + return self.__class__(url=self.url if url is None else self.url, + callback=callback, + method=self.method if method is None else method, + headers=copy.deepcopy(self.headers) if headers is None else headers, + body=self.body if body is None else body, + cookies=self.cookies if cookies is None else cookies, + meta=self.meta if meta is None else meta, + encoding=self.encoding if encoding is None else encoding, + dont_filter=self.dont_filter if dont_filter is None else dont_filter) def httprepr(self): """ Return raw HTTP request representation (as string). This is diff --git a/scrapy/trunk/scrapy/http/response/__init__.py b/scrapy/trunk/scrapy/http/response/__init__.py index 98ff3e47a..830a8dff7 100644 --- a/scrapy/trunk/scrapy/http/response/__init__.py +++ b/scrapy/trunk/scrapy/http/response/__init__.py @@ -55,12 +55,12 @@ class Response(object): """ if cls is None: cls = self.__class__ - new = cls(url=url or self.url, - status=status or self.status, - headers=headers or copy.deepcopy(self.headers), - body=body or self.body, - meta=meta or self.meta, - flags=flags or self.flags, + new = cls(url=self.url if url is None else url, + status=self.status if status is None else status, + headers=copy.deepcopy(self.headers) if headers is None else headers, + body=self.body if body is None else body, + meta=self.meta if meta is None else meta, + flags=self.flags if flags is None else flags, **kwargs) return new diff --git a/scrapy/trunk/scrapy/tests/test_http_request.py b/scrapy/trunk/scrapy/tests/test_http_request.py index 6a766c214..0b658ff01 100644 --- a/scrapy/trunk/scrapy/tests/test_http_request.py +++ b/scrapy/trunk/scrapy/tests/test_http_request.py @@ -7,6 +7,10 @@ class RequestTest(unittest.TestCase): # Request requires url in the constructor self.assertRaises(Exception, Request) + # url argument must be basestring or Url + self.assertRaises(TypeError, Request, 123) + r = Request(Url('http://www.example.com')) + r = Request("http://www.example.com") assert isinstance(r.url, Url) self.assertEqual(r.url, "http://www.example.com") @@ -124,6 +128,8 @@ class RequestTest(unittest.TestCase): # make sure headers attribute is shallow copied assert r1.headers is not r2.headers, "headers must be a shallow copy, not identical" self.assertEqual(r1.headers, r2.headers) + self.assertEqual(r1.encoding, r2.encoding) + self.assertEqual(r1.dont_filter, r2.dont_filter) # Request.body can be identical since it's an immutable object (str) @@ -148,6 +154,13 @@ class RequestTest(unittest.TestCase): self.assertEqual((r1.body, r2.body), ('', "New body")) self.assertEqual((r1.headers, r2.headers), ({}, hdrs)) + # Empty attributes (which may fail if not compared properly) + r3 = Request("http://www.example.com", meta={'a': 1}, dont_filter=True) + r4 = r3.replace(body='', meta={}, dont_filter=False) + self.assertEqual(r4.body, '') + self.assertEqual(r4.meta, {}) + assert r4.dont_filter is False + def test_httprepr(self): r1 = Request("http://www.example.com") self.assertEqual(r1.httprepr(), 'GET http://www.example.com HTTP/1.1\r\nHost: www.example.com\r\n\r\n') diff --git a/scrapy/trunk/scrapy/tests/test_http_response.py b/scrapy/trunk/scrapy/tests/test_http_response.py index 61062a824..1737a7690 100644 --- a/scrapy/trunk/scrapy/tests/test_http_response.py +++ b/scrapy/trunk/scrapy/tests/test_http_response.py @@ -97,6 +97,13 @@ class ResponseTest(unittest.TestCase): self.assertEqual(r3.url, "http://www.example.com/other") self.assertEqual(r3.encoding, "latin1") + # Empty attributes (which may fail if not compared properly) + r3 = Response("http://www.example.com", meta={'a': 1}, flags=['cached']) + r4 = r3.replace(body='', meta={}, flags=[]) + self.assertEqual(r4.body, '') + self.assertEqual(r4.meta, {}) + self.assertEqual(r4.flags, []) + def test_httprepr(self): r1 = Response("http://www.example.com") self.assertEqual(r1.httprepr(), 'HTTP/1.1 200 OK\r\n\r\n') -- GitLab