From 94e28adfb7049be8ec0e9ce00bab1ab3a0b25301 Mon Sep 17 00:00:00 2001 From: Mikhail Korobov Date: Tue, 1 Mar 2016 16:29:12 +0500 Subject: [PATCH] Extract a function to build CONNECT request; add tests for it. See GH-1701 and GH-1808. --- scrapy/core/downloader/handlers/http11.py | 34 +++++++++++++++++------ 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py index 5261f542b..c93a48597 100644 --- a/scrapy/core/downloader/handlers/http11.py +++ b/scrapy/core/downloader/handlers/http11.py @@ -107,15 +107,8 @@ class TunnelingTCP4ClientEndpoint(TCP4ClientEndpoint): def requestTunnel(self, protocol): """Asks the proxy to open a tunnel.""" - tunnelReq = ( - b'CONNECT ' + - to_bytes(self._tunneledHost, encoding='ascii') + b':' + - to_bytes(str(self._tunneledPort)) + - b' HTTP/1.1\r\n') - if self._proxyAuthHeader: - tunnelReq += \ - b'Proxy-Authorization: ' + self._proxyAuthHeader + b'\r\n' - tunnelReq += b'\r\n' + tunnelReq = tunnel_request_data(self._tunneledHost, self._tunneledPort, + self._proxyAuthHeader) protocol.transport.write(tunnelReq) self._protocolDataReceived = protocol.dataReceived protocol.dataReceived = self.processProxyResponse @@ -149,6 +142,29 @@ class TunnelingTCP4ClientEndpoint(TCP4ClientEndpoint): return self._tunnelReadyDeferred +def tunnel_request_data(host, port, proxy_auth_header=None): + r""" + Return binary content of a CONNECT request. + + >>> from scrapy.utils.python import to_native_str as s + >>> s(tunnel_request_data("example.com", 8080)) + 'CONNECT example.com:8080 HTTP/1.1\r\n\r\n' + >>> s(tunnel_request_data("example.com", 8080, b"123")) + 'CONNECT example.com:8080 HTTP/1.1\r\nProxy-Authorization: 123\r\n\r\n' + >>> s(tunnel_request_data(b"example.com", "8090")) + 'CONNECT example.com:8090 HTTP/1.1\r\n\r\n' + """ + tunnel_req = ( + b'CONNECT ' + + to_bytes(host, encoding='ascii') + b':' + + to_bytes(str(port)) + + b' HTTP/1.1\r\n') + if proxy_auth_header: + tunnel_req += b'Proxy-Authorization: ' + proxy_auth_header + b'\r\n' + tunnel_req += b'\r\n' + return tunnel_req + + class TunnelingAgent(Agent): """An agent that uses a L{TunnelingTCP4ClientEndpoint} to make HTTPS downloads. It may look strange that we have chosen to subclass Agent and not -- GitLab