提交 f7a48b0c 编写于 作者: M Mikhail Korobov

Merge pull request #1794 from redapple/twisted-tls

[MRG+1] Use best practices for TLS connections when using Twisted>=14.0
from OpenSSL import SSL
from twisted.internet.ssl import ClientContextFactory
try:
# available since twisted 14.0
from zope.interface.declarations import implementer
# the following should be available from Twisted 14.0.0
from twisted.internet.ssl import optionsForClientTLS, CertificateOptions, platformTrust
from twisted.internet._sslverify import ClientTLSOptions
from twisted.web.client import BrowserLikePolicyForHTTPS
from twisted.web.iweb import IPolicyForHTTPS
@implementer(IPolicyForHTTPS)
class ScrapyClientContextFactory(BrowserLikePolicyForHTTPS):
"""
Non-peer-certificate verifying HTTPS context factory
Default OpenSSL method is TLS_METHOD (also called SSLv23_METHOD)
which allows TLS protocol negotiation
'A TLS/SSL connection established with [this method] may
understand the SSLv3, TLSv1, TLSv1.1 and TLSv1.2 protocols.'
"""
def __init__(self, method=SSL.SSLv23_METHOD, *args, **kwargs):
super(ScrapyClientContextFactory, self).__init__(*args, **kwargs)
self._ssl_method = method
def getCertificateOptions(self):
# setting verify=True will require you to provide CAs
# to verify against; in other words: it's not that simple
# backward-compatible SSL/TLS method:
#
# * this will respect `method` attribute in often recommended
# `ScrapyClientContextFactory` subclass
# (https://github.com/scrapy/scrapy/issues/1429#issuecomment-131782133)
#
# * getattr() for `_ssl_method` attribute for context factories
# not calling super(..., self).__init__
return CertificateOptions(verify=False,
method=getattr(self, 'method',
getattr(self, '_ssl_method', None)))
# kept for old-style HTTP/1.0 downloader context twisted calls,
# e.g. connectSSL()
def getContext(self, hostname=None, port=None):
return self.getCertificateOptions().getContext()
def creatorForNetloc(self, hostname, port):
return ClientTLSOptions(hostname.decode("ascii"), self.getContext())
@implementer(IPolicyForHTTPS)
class BrowserLikeContextFactory(ScrapyClientContextFactory):
"""
Twisted-recommended context factory for web clients.
Quoting http://twistedmatrix.com/documents/current/api/twisted.web.client.Agent.html:
"The default is to use a BrowserLikePolicyForHTTPS,
so unless you have special requirements you can leave this as-is."
creatorForNetloc() is the same as BrowserLikePolicyForHTTPS
except this context factory allows setting the TLS/SSL method to use.
Default OpenSSL method is TLS_METHOD (also called SSLv23_METHOD)
which allows TLS protocol negotiation.
"""
def creatorForNetloc(self, hostname, port):
# trustRoot set to platformTrust() will use the platform's root CAs.
#
# This means that a website like https://www.cacert.org will be rejected
# by default, since CAcert.org CA certificate is seldom shipped.
return optionsForClientTLS(hostname.decode("ascii"),
trustRoot=platformTrust(),
extraCertificateOptions={
'method': self._ssl_method,
})
except ImportError:
ClientTLSOptions = None
class ScrapyClientContextFactory(ClientContextFactory):
"A SSL context factory which is more permissive against SSL bugs."
# see https://github.com/scrapy/scrapy/issues/82
# and https://github.com/scrapy/scrapy/issues/26
# and https://github.com/scrapy/scrapy/issues/981
def __init__(self):
# see this issue on why we use TLSv1_METHOD by default
# https://github.com/scrapy/scrapy/issues/194
self.method = SSL.TLSv1_METHOD
def getContext(self, hostname=None, port=None):
ctx = ClientContextFactory.getContext(self)
# Enable all workarounds to SSL bugs as documented by
# http://www.openssl.org/docs/ssl/SSL_CTX_set_options.html
ctx.set_options(SSL.OP_ALL)
if hostname and ClientTLSOptions is not None: # workaround for TLS SNI
ClientTLSOptions(hostname, ctx)
return ctx
class ScrapyClientContextFactory(ClientContextFactory):
"A SSL context factory which is more permissive against SSL bugs."
# see https://github.com/scrapy/scrapy/issues/82
# and https://github.com/scrapy/scrapy/issues/26
# and https://github.com/scrapy/scrapy/issues/981
def __init__(self, method=SSL.SSLv23_METHOD):
self.method = method
def getContext(self, hostname=None, port=None):
ctx = ClientContextFactory.getContext(self)
# Enable all workarounds to SSL bugs as documented by
# http://www.openssl.org/docs/ssl/SSL_CTX_set_options.html
ctx.set_options(SSL.OP_ALL)
return ctx
......@@ -4,6 +4,7 @@ import re
import logging
from io import BytesIO
from time import time
import warnings
from six.moves.urllib.parse import urldefrag
from zope.interface import implementer
......@@ -18,6 +19,7 @@ from scrapy.xlib.tx import Agent, ProxyAgent, ResponseDone, \
from scrapy.http import Headers
from scrapy.responsetypes import responsetypes
from scrapy.core.downloader.webclient import _parse
from scrapy.core.downloader.tls import openssl_methods
from scrapy.utils.misc import load_object
from scrapy.utils.python import to_bytes, to_unicode
from scrapy import twisted_version
......@@ -31,8 +33,21 @@ class HTTP11DownloadHandler(object):
self._pool = HTTPConnectionPool(reactor, persistent=True)
self._pool.maxPersistentPerHost = settings.getint('CONCURRENT_REQUESTS_PER_DOMAIN')
self._pool._factory.noisy = False
self._sslMethod = openssl_methods[settings.get('DOWNLOADER_CLIENT_TLS_METHOD')]
self._contextFactoryClass = load_object(settings['DOWNLOADER_CLIENTCONTEXTFACTORY'])
self._contextFactory = self._contextFactoryClass()
# try method-aware context factory
try:
self._contextFactory = self._contextFactoryClass(method=self._sslMethod)
except TypeError:
# use context factory defaults
self._contextFactory = self._contextFactoryClass()
msg = """
'%s' does not accept `method` argument (type OpenSSL.SSL method,\
e.g. OpenSSL.SSL.SSLv23_METHOD).\
Please upgrade your context factory class to handle it or ignore it.""" % (
settings['DOWNLOADER_CLIENTCONTEXTFACTORY'],)
warnings.warn(msg)
self._default_maxsize = settings.getint('DOWNLOAD_MAXSIZE')
self._default_warnsize = settings.getint('DOWNLOAD_WARNSIZE')
self._disconnect_timeout = 1
......
from OpenSSL import SSL
METHOD_SSLv3 = 'SSLv3'
METHOD_TLS = 'TLS'
METHOD_TLSv10 = 'TLSv1.0'
METHOD_TLSv11 = 'TLSv1.1'
METHOD_TLSv12 = 'TLSv1.2'
openssl_methods = {
METHOD_TLS: SSL.SSLv23_METHOD, # protocol negotiation (recommended)
METHOD_SSLv3: SSL.SSLv3_METHOD, # SSL 3 (NOT recommended)
METHOD_TLSv10: SSL.TLSv1_METHOD, # TLS 1.0 only
METHOD_TLSv11: getattr(SSL, 'TLSv1_1_METHOD', 5), # TLS 1.1 only
METHOD_TLSv12: getattr(SSL, 'TLSv1_2_METHOD', 6), # TLS 1.2 only
}
......@@ -83,6 +83,8 @@ DOWNLOADER = 'scrapy.core.downloader.Downloader'
DOWNLOADER_HTTPCLIENTFACTORY = 'scrapy.core.downloader.webclient.ScrapyHTTPClientFactory'
DOWNLOADER_CLIENTCONTEXTFACTORY = 'scrapy.core.downloader.contextfactory.ScrapyClientContextFactory'
DOWNLOADER_CLIENT_TLS_METHOD = 'TLS' # Use highest TLS/SSL protocol version supported by the platform,
# also allowing negotiation
DOWNLOADER_MIDDLEWARES = {}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册