From 9d9d83a8c31b6a18d7aaac35a30ffb69db4bb81d Mon Sep 17 00:00:00 2001 From: Aditya Date: Sat, 28 Oct 2017 16:24:40 +0530 Subject: [PATCH] Use https links wherever possible --- CONTRIBUTING.md | 4 ++-- INSTALL | 2 +- README.rst | 14 ++++++------- debian/control | 6 +++--- debian/copyright | 8 ++++---- docs/contributing.rst | 2 +- docs/intro/overview.rst | 2 +- docs/topics/practices.rst | 2 +- docs/topics/selectors.rst | 4 ++-- docs/topics/shell.rst | 8 ++++---- scrapy/_monkeypatches.py | 4 ++-- scrapy/core/downloader/contextfactory.py | 4 ++-- scrapy/crawler.py | 2 +- scrapy/downloadermiddlewares/chunked.py | 2 +- scrapy/downloadermiddlewares/httpcache.py | 2 +- scrapy/exporters.py | 2 +- scrapy/extensions/httpcache.py | 10 +++++----- scrapy/extensions/telnet.py | 2 +- scrapy/pipelines/files.py | 4 ++-- scrapy/signalmanager.py | 2 +- scrapy/templates/project/module/items.py.tmpl | 2 +- .../project/module/middlewares.py.tmpl | 2 +- .../project/module/pipelines.py.tmpl | 2 +- .../templates/project/module/settings.py.tmpl | 20 +++++++++---------- scrapy/utils/defer.py | 2 +- scrapy/utils/deprecate.py | 12 +++++------ scrapy/utils/http.py | 2 +- scrapy/utils/log.py | 2 +- scrapy/utils/url.py | 2 +- sep/sep-001.rst | 2 +- sep/sep-006.rst | 4 ++-- sep/sep-013.rst | 2 +- sep/sep-017.rst | 2 +- sep/sep-020.rst | 2 +- setup.py | 2 +- tests/__init__.py | 2 +- tests/keys/example-com.conf | 4 ++-- 37 files changed, 76 insertions(+), 76 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 88c472f6f..0a11b05d2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ The guidelines for contributing are available here: -http://doc.scrapy.org/en/master/contributing.html +https://doc.scrapy.org/en/master/contributing.html Please do not abuse the issue tracker for support questions. If your issue topic can be rephrased to "How to ...?", please use the -support channels to get it answered: http://scrapy.org/community/ +support channels to get it answered: https://scrapy.org/community/ diff --git a/INSTALL b/INSTALL index 84803a933..a3c7899c6 100644 --- a/INSTALL +++ b/INSTALL @@ -1,4 +1,4 @@ For information about installing Scrapy see: * docs/intro/install.rst (local file) -* http://doc.scrapy.org/en/latest/intro/install.html (online version) +* https://doc.scrapy.org/en/latest/intro/install.html (online version) diff --git a/README.rst b/README.rst index da63f2b93..45135c7a2 100644 --- a/README.rst +++ b/README.rst @@ -31,7 +31,7 @@ crawl websites and extract structured data from their pages. It can be used for a wide range of purposes, from data mining to monitoring and automated testing. For more information including a list of features check the Scrapy homepage at: -http://scrapy.org +https://scrapy.org Requirements ============ @@ -47,12 +47,12 @@ The quick way:: pip install scrapy For more details see the install section in the documentation: -http://doc.scrapy.org/en/latest/intro/install.html +https://doc.scrapy.org/en/latest/intro/install.html Documentation ============= -Documentation is available online at http://doc.scrapy.org/ and in the ``docs`` +Documentation is available online at https://doc.scrapy.org/ and in the ``docs`` directory. Releases @@ -63,12 +63,12 @@ You can find release notes at https://doc.scrapy.org/en/latest/news.html Community (blog, twitter, mail list, IRC) ========================================= -See http://scrapy.org/community/ +See https://scrapy.org/community/ Contributing ============ -See http://doc.scrapy.org/en/master/contributing.html +See https://doc.scrapy.org/en/master/contributing.html Code of Conduct --------------- @@ -82,9 +82,9 @@ Please report unacceptable behavior to opensource@scrapinghub.com. Companies using Scrapy ====================== -See http://scrapy.org/companies/ +See https://scrapy.org/companies/ Commercial Support ================== -See http://scrapy.org/support/ +See https://scrapy.org/support/ diff --git a/debian/control b/debian/control index f3a31753b..2cc8eedf4 100644 --- a/debian/control +++ b/debian/control @@ -4,7 +4,7 @@ Priority: optional Maintainer: Scrapinghub Team Build-Depends: debhelper (>= 7.0.50), python (>=2.7), python-twisted, python-w3lib, python-lxml, python-six (>=1.5.2) Standards-Version: 3.8.4 -Homepage: http://scrapy.org/ +Homepage: https://scrapy.org/ Package: scrapy Architecture: all @@ -15,6 +15,6 @@ Conflicts: python-scrapy, scrapy-0.25 Provides: python-scrapy, scrapy-0.25 Description: Python web crawling and web scraping framework Scrapy is a fast high-level web crawling and web scraping framework, - used to crawl websites and extract structured data from their pages. - It can be used for a wide range of purposes, from data mining to + used to crawl websites and extract structured data from their pages. + It can be used for a wide range of purposes, from data mining to monitoring and automated testing. diff --git a/debian/copyright b/debian/copyright index 4cc239002..c1bf47565 100644 --- a/debian/copyright +++ b/debian/copyright @@ -1,6 +1,6 @@ This package was debianized by the Scrapinghub team . -It was downloaded from http://scrapy.org +It was downloaded from https://scrapy.org Upstream Author: Scrapy Developers @@ -14,10 +14,10 @@ All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - 1. Redistributions of source code must retain the above copyright notice, + 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright + + 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. diff --git a/docs/contributing.rst b/docs/contributing.rst index 291a1054e..f3732ab06 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -7,7 +7,7 @@ Contributing to Scrapy .. important:: Double check you are reading the most recent version of this document at - http://doc.scrapy.org/en/master/contributing.html + https://doc.scrapy.org/en/master/contributing.html There are many ways to contribute to Scrapy. Here are some of them: diff --git a/docs/intro/overview.rst b/docs/intro/overview.rst index d0ce07a8e..6f1c2c43f 100644 --- a/docs/intro/overview.rst +++ b/docs/intro/overview.rst @@ -160,7 +160,7 @@ The next steps for you are to :ref:`install Scrapy `, a full-blown Scrapy project and `join the community`_. Thanks for your interest! -.. _join the community: http://scrapy.org/community/ +.. _join the community: https://scrapy.org/community/ .. _web scraping: https://en.wikipedia.org/wiki/Web_scraping .. _Amazon Associates Web Services: https://affiliate-program.amazon.com/gp/advertising/api/detail/main.html .. _Amazon S3: https://aws.amazon.com/s3/ diff --git a/docs/topics/practices.rst b/docs/topics/practices.rst index e0dd4000f..02cfa9b05 100644 --- a/docs/topics/practices.rst +++ b/docs/topics/practices.rst @@ -248,7 +248,7 @@ If you are still unable to prevent your bot getting banned, consider contacting `commercial support`_. .. _Tor project: https://www.torproject.org/ -.. _commercial support: http://scrapy.org/support/ +.. _commercial support: https://scrapy.org/support/ .. _ProxyMesh: https://proxymesh.com/ .. _Google cache: http://www.googleguide.com/cached_pages.html .. _testspiders: https://github.com/scrapinghub/testspiders diff --git a/docs/topics/selectors.rst b/docs/topics/selectors.rst index cb4c25391..8ac40c3cc 100644 --- a/docs/topics/selectors.rst +++ b/docs/topics/selectors.rst @@ -86,7 +86,7 @@ To explain how to use the selectors we'll use the `Scrapy shell` (which provides interactive testing) and an example page located in the Scrapy documentation server: - http://doc.scrapy.org/en/latest/_static/selectors-sample1.html + https://doc.scrapy.org/en/latest/_static/selectors-sample1.html .. _topics-selectors-htmlcode: @@ -99,7 +99,7 @@ Here's its HTML code: First, let's open the shell:: - scrapy shell http://doc.scrapy.org/en/latest/_static/selectors-sample1.html + scrapy shell https://doc.scrapy.org/en/latest/_static/selectors-sample1.html Then, after the shell loads, you'll have the response available as ``response`` shell variable, and its attached selector in ``response.selector`` attribute. diff --git a/docs/topics/shell.rst b/docs/topics/shell.rst index 527116418..11ab199f2 100644 --- a/docs/topics/shell.rst +++ b/docs/topics/shell.rst @@ -142,7 +142,7 @@ Example of shell session ======================== Here's an example of a typical shell session where we start by scraping the -http://scrapy.org page, and then proceed to scrape the https://reddit.com +https://scrapy.org page, and then proceed to scrape the https://reddit.com page. Finally, we modify the (Reddit) request method to POST and re-fetch it getting an error. We end the session by typing Ctrl-D (in Unix systems) or Ctrl-Z in Windows. @@ -154,7 +154,7 @@ shell works. First, we launch the shell:: - scrapy shell 'http://scrapy.org' --nolog + scrapy shell 'https://scrapy.org' --nolog Then, the shell fetches the URL (using the Scrapy downloader) and prints the list of available objects and useful shortcuts (you'll notice that these lines @@ -164,7 +164,7 @@ all start with the ``[s]`` prefix):: [s] scrapy scrapy module (contains scrapy.Request, scrapy.Selector, etc) [s] crawler [s] item {} - [s] request + [s] request [s] response <200 https://scrapy.org/> [s] settings [s] spider @@ -182,7 +182,7 @@ After that, we can start playing with the objects:: >>> response.xpath('//title/text()').extract_first() 'Scrapy | A Fast and Powerful Scraping and Web Crawling Framework' - >>> fetch("http://reddit.com") + >>> fetch("https://reddit.com") >>> response.xpath('//title/text()').extract() ['reddit: the front page of the internet'] diff --git a/scrapy/_monkeypatches.py b/scrapy/_monkeypatches.py index 60e0de1f2..f55ecc213 100644 --- a/scrapy/_monkeypatches.py +++ b/scrapy/_monkeypatches.py @@ -4,12 +4,12 @@ from six.moves import copyreg if sys.version_info[0] == 2: from urlparse import urlparse - # workaround for http://bugs.python.org/issue7904 - Python < 2.7 + # workaround for https://bugs.python.org/issue7904 - Python < 2.7 if urlparse('s3://bucket/key').netloc != 'bucket': from urlparse import uses_netloc uses_netloc.append('s3') - # workaround for http://bugs.python.org/issue9374 - Python < 2.7.4 + # workaround for https://bugs.python.org/issue9374 - Python < 2.7.4 if urlparse('s3://bucket/key?key=value').query != 'key=value': from urlparse import uses_query uses_query.append('s3') diff --git a/scrapy/core/downloader/contextfactory.py b/scrapy/core/downloader/contextfactory.py index a94a89205..783d4c383 100644 --- a/scrapy/core/downloader/contextfactory.py +++ b/scrapy/core/downloader/contextfactory.py @@ -64,7 +64,7 @@ if twisted_version >= (14, 0, 0): """ Twisted-recommended context factory for web clients. - Quoting http://twistedmatrix.com/documents/current/api/twisted.web.client.Agent.html: + Quoting https://twistedmatrix.com/documents/current/api/twisted.web.client.Agent.html: "The default is to use a BrowserLikePolicyForHTTPS, so unless you have special requirements you can leave this as-is." @@ -100,6 +100,6 @@ else: def getContext(self, hostname=None, port=None): ctx = ClientContextFactory.getContext(self) # Enable all workarounds to SSL bugs as documented by - # http://www.openssl.org/docs/ssl/SSL_CTX_set_options.html + # https://www.openssl.org/docs/manmaster/man3/SSL_CTX_set_options.html ctx.set_options(SSL.OP_ALL) return ctx diff --git a/scrapy/crawler.py b/scrapy/crawler.py index a33ce9805..5cbc2d7c5 100644 --- a/scrapy/crawler.py +++ b/scrapy/crawler.py @@ -83,7 +83,7 @@ class Crawler(object): yield defer.maybeDeferred(self.engine.start) except Exception: # In Python 2 reraising an exception after yield discards - # the original traceback (see http://bugs.python.org/issue7563), + # the original traceback (see https://bugs.python.org/issue7563), # so sys.exc_info() workaround is used. # This workaround also works in Python 3, but it is not needed, # and it is slower, so in Python 3 we use native `raise`. diff --git a/scrapy/downloadermiddlewares/chunked.py b/scrapy/downloadermiddlewares/chunked.py index 64d94c489..6748d0265 100644 --- a/scrapy/downloadermiddlewares/chunked.py +++ b/scrapy/downloadermiddlewares/chunked.py @@ -11,7 +11,7 @@ warnings.warn("Module `scrapy.downloadermiddlewares.chunked` is deprecated, " class ChunkedTransferMiddleware(object): """This middleware adds support for chunked transfer encoding, as - documented in: http://en.wikipedia.org/wiki/Chunked_transfer_encoding + documented in: https://en.wikipedia.org/wiki/Chunked_transfer_encoding """ def process_response(self, request, response, spider): diff --git a/scrapy/downloadermiddlewares/httpcache.py b/scrapy/downloadermiddlewares/httpcache.py index 30e49b886..495b103d1 100644 --- a/scrapy/downloadermiddlewares/httpcache.py +++ b/scrapy/downloadermiddlewares/httpcache.py @@ -75,7 +75,7 @@ class HttpCacheMiddleware(object): return response # RFC2616 requires origin server to set Date header, - # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.18 + # https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.18 if 'Date' not in response.headers: response.headers['Date'] = formatdate(usegmt=1) diff --git a/scrapy/exporters.py b/scrapy/exporters.py index e2d42b6ab..07f43b494 100644 --- a/scrapy/exporters.py +++ b/scrapy/exporters.py @@ -188,7 +188,7 @@ class XmlItemExporter(BaseItemExporter): self.xg.endElement(name) self._beautify_newline() - # Workaround for http://bugs.python.org/issue17606 + # Workaround for https://bugs.python.org/issue17606 # Before Python 2.7.4 xml.sax.saxutils required bytes; # since 2.7.4 it requires unicode. The bug is likely to be # fixed in 2.7.6, but 2.7.6 will still support unicode, diff --git a/scrapy/extensions/httpcache.py b/scrapy/extensions/httpcache.py index 648b32ec7..1b5e05b1b 100644 --- a/scrapy/extensions/httpcache.py +++ b/scrapy/extensions/httpcache.py @@ -70,8 +70,8 @@ class RFC2616Policy(object): return True def should_cache_response(self, response, request): - # What is cacheable - http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec14.9.1 - # Response cacheability - http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.4 + # What is cacheable - https://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec14.9.1 + # Response cacheability - https://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.4 # Status code 206 is not included because cache can not deal with partial contents cc = self._parse_cachecontrol(response) # obey directive "Cache-Control: no-store" @@ -163,7 +163,7 @@ class RFC2616Policy(object): def _compute_freshness_lifetime(self, response, request, now): # Reference nsHttpResponseHead::ComputeFreshnessLifetime - # http://dxr.mozilla.org/mozilla-central/source/netwerk/protocol/http/nsHttpResponseHead.cpp#410 + # https://dxr.mozilla.org/mozilla-central/source/netwerk/protocol/http/nsHttpResponseHead.cpp#706 cc = self._parse_cachecontrol(response) maxage = self._get_max_age(cc) if maxage is not None: @@ -194,7 +194,7 @@ class RFC2616Policy(object): def _compute_current_age(self, response, request, now): # Reference nsHttpResponseHead::ComputeCurrentAge - # http://dxr.mozilla.org/mozilla-central/source/netwerk/protocol/http/nsHttpResponseHead.cpp#366 + # https://dxr.mozilla.org/mozilla-central/source/netwerk/protocol/http/nsHttpResponseHead.cpp#658 currentage = 0 # If Date header is not set we assume it is a fast connection, and # clock is in sync with the server @@ -414,7 +414,7 @@ class LeveldbCacheStorage(object): def parse_cachecontrol(header): """Parse Cache-Control header - http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9 + https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9 >>> parse_cachecontrol(b'public, max-age=3600') == {b'public': None, ... b'max-age': b'3600'} diff --git a/scrapy/extensions/telnet.py b/scrapy/extensions/telnet.py index d9add1d97..5ca0d19a0 100644 --- a/scrapy/extensions/telnet.py +++ b/scrapy/extensions/telnet.py @@ -82,7 +82,7 @@ class TelnetConsole(protocol.ServerFactory): 'prefs': print_live_refs, 'hpy': hpy, 'help': "This is Scrapy telnet console. For more info see: " \ - "http://doc.scrapy.org/en/latest/topics/telnetconsole.html", + "https://doc.scrapy.org/en/latest/topics/telnetconsole.html", } self.crawler.signals.send_catch_log(update_telnet_vars, telnet_vars=telnet_vars) return telnet_vars diff --git a/scrapy/pipelines/files.py b/scrapy/pipelines/files.py index 7fdb8a086..9f1faa313 100644 --- a/scrapy/pipelines/files.py +++ b/scrapy/pipelines/files.py @@ -120,7 +120,7 @@ class S3FilesStore(object): def _get_boto_bucket(self): # disable ssl (is_secure=False) because of this python bug: - # http://bugs.python.org/issue5103 + # https://bugs.python.org/issue5103 c = self.S3Connection(self.AWS_ACCESS_KEY_ID, self.AWS_SECRET_ACCESS_KEY, is_secure=False) return c.get_bucket(self.bucket, validate=False) @@ -268,7 +268,7 @@ class FilesPipeline(MediaPipeline): def __init__(self, store_uri, download_func=None, settings=None): if not store_uri: raise NotConfigured - + if isinstance(settings, dict) or settings is None: settings = Settings(settings) diff --git a/scrapy/signalmanager.py b/scrapy/signalmanager.py index fd79905e9..296d27ed8 100644 --- a/scrapy/signalmanager.py +++ b/scrapy/signalmanager.py @@ -55,7 +55,7 @@ class SignalManager(object): The keyword arguments are passed to the signal handlers (connected through the :meth:`connect` method). - .. _deferreds: http://twistedmatrix.com/documents/current/core/howto/defer.html + .. _deferreds: https://twistedmatrix.com/documents/current/core/howto/defer.html """ kwargs.setdefault('sender', self.sender) return _signal.send_catch_log_deferred(signal, **kwargs) diff --git a/scrapy/templates/project/module/items.py.tmpl b/scrapy/templates/project/module/items.py.tmpl index 2c746138f..7d766f4fc 100644 --- a/scrapy/templates/project/module/items.py.tmpl +++ b/scrapy/templates/project/module/items.py.tmpl @@ -3,7 +3,7 @@ # Define here the models for your scraped items # # See documentation in: -# http://doc.scrapy.org/en/latest/topics/items.html +# https://doc.scrapy.org/en/latest/topics/items.html import scrapy diff --git a/scrapy/templates/project/module/middlewares.py.tmpl b/scrapy/templates/project/module/middlewares.py.tmpl index 1a4b0caa5..c5b542bd6 100644 --- a/scrapy/templates/project/module/middlewares.py.tmpl +++ b/scrapy/templates/project/module/middlewares.py.tmpl @@ -3,7 +3,7 @@ # Define here the models for your spider middleware # # See documentation in: -# http://doc.scrapy.org/en/latest/topics/spider-middleware.html +# https://doc.scrapy.org/en/latest/topics/spider-middleware.html from scrapy import signals diff --git a/scrapy/templates/project/module/pipelines.py.tmpl b/scrapy/templates/project/module/pipelines.py.tmpl index 4e9b32e9e..e58dab089 100644 --- a/scrapy/templates/project/module/pipelines.py.tmpl +++ b/scrapy/templates/project/module/pipelines.py.tmpl @@ -3,7 +3,7 @@ # Define your item pipelines here # # Don't forget to add your pipeline to the ITEM_PIPELINES setting -# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html +# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html class ${ProjectName}Pipeline(object): diff --git a/scrapy/templates/project/module/settings.py.tmpl b/scrapy/templates/project/module/settings.py.tmpl index 35a0f9a45..a0557473e 100644 --- a/scrapy/templates/project/module/settings.py.tmpl +++ b/scrapy/templates/project/module/settings.py.tmpl @@ -5,9 +5,9 @@ # For simplicity, this file contains only settings considered important or # commonly used. You can find more settings consulting the documentation: # -# http://doc.scrapy.org/en/latest/topics/settings.html -# http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html -# http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html +# https://doc.scrapy.org/en/latest/topics/settings.html +# https://doc.scrapy.org/en/latest/topics/downloader-middleware.html +# https://doc.scrapy.org/en/latest/topics/spider-middleware.html BOT_NAME = '$project_name' @@ -25,7 +25,7 @@ ROBOTSTXT_OBEY = True #CONCURRENT_REQUESTS = 32 # Configure a delay for requests for the same website (default: 0) -# See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay +# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay # See also autothrottle settings and docs #DOWNLOAD_DELAY = 3 # The download delay setting will honor only one of: @@ -45,31 +45,31 @@ ROBOTSTXT_OBEY = True #} # Enable or disable spider middlewares -# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html +# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html #SPIDER_MIDDLEWARES = { # '$project_name.middlewares.${ProjectName}SpiderMiddleware': 543, #} # Enable or disable downloader middlewares -# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html +# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html #DOWNLOADER_MIDDLEWARES = { # '$project_name.middlewares.${ProjectName}DownloaderMiddleware': 543, #} # Enable or disable extensions -# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html +# See https://doc.scrapy.org/en/latest/topics/extensions.html #EXTENSIONS = { # 'scrapy.extensions.telnet.TelnetConsole': None, #} # Configure item pipelines -# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html +# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html #ITEM_PIPELINES = { # '$project_name.pipelines.${ProjectName}Pipeline': 300, #} # Enable and configure the AutoThrottle extension (disabled by default) -# See http://doc.scrapy.org/en/latest/topics/autothrottle.html +# See https://doc.scrapy.org/en/latest/topics/autothrottle.html #AUTOTHROTTLE_ENABLED = True # The initial download delay #AUTOTHROTTLE_START_DELAY = 5 @@ -82,7 +82,7 @@ ROBOTSTXT_OBEY = True #AUTOTHROTTLE_DEBUG = False # Enable and configure HTTP caching (disabled by default) -# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings +# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings #HTTPCACHE_ENABLED = True #HTTPCACHE_EXPIRATION_SECS = 0 #HTTPCACHE_DIR = 'httpcache' diff --git a/scrapy/utils/defer.py b/scrapy/utils/defer.py index bb4c74a6e..aa6dcffda 100644 --- a/scrapy/utils/defer.py +++ b/scrapy/utils/defer.py @@ -57,7 +57,7 @@ def parallel(iterable, count, callable, *args, **named): """Execute a callable over the objects in the given iterable, in parallel, using no more than ``count`` concurrent calls. - Taken from: http://jcalderone.livejournal.com/24285.html + Taken from: https://jcalderone.livejournal.com/24285.html """ coop = task.Cooperator() work = (callable(elem, *args, **named) for elem in iterable) diff --git a/scrapy/utils/deprecate.py b/scrapy/utils/deprecate.py index 7ab39c97e..f76161a68 100644 --- a/scrapy/utils/deprecate.py +++ b/scrapy/utils/deprecate.py @@ -71,8 +71,8 @@ def create_deprecated_class(name, new_class, clsdict=None, warnings.warn(msg, warn_category, stacklevel=2) super(DeprecatedClass, cls).__init__(name, bases, clsdict_) - # see http://www.python.org/dev/peps/pep-3119/#overloading-isinstance-and-issubclass - # and http://docs.python.org/2/reference/datamodel.html#customizing-instance-and-subclass-checks + # see https://www.python.org/dev/peps/pep-3119/#overloading-isinstance-and-issubclass + # and https://docs.python.org/reference/datamodel.html#customizing-instance-and-subclass-checks # for implementation details def __instancecheck__(cls, inst): return any(cls.__subclasscheck__(c) @@ -159,10 +159,10 @@ def update_classpath(path): def method_is_overridden(subclass, base_class, method_name): - """ - Return True if a method named ``method_name`` of a ``base_class`` - is overridden in a ``subclass``. - + """ + Return True if a method named ``method_name`` of a ``base_class`` + is overridden in a ``subclass``. + >>> class Base(object): ... def foo(self): ... pass diff --git a/scrapy/utils/http.py b/scrapy/utils/http.py index 8b659a22a..7cc8d1884 100644 --- a/scrapy/utils/http.py +++ b/scrapy/utils/http.py @@ -11,7 +11,7 @@ def decode_chunked_transfer(chunked_body): decoded body. For more info see: - http://en.wikipedia.org/wiki/Chunked_transfer_encoding + https://en.wikipedia.org/wiki/Chunked_transfer_encoding """ body, h, t = '', '', chunked_body diff --git a/scrapy/utils/log.py b/scrapy/utils/log.py index 7c95e1e50..828880709 100644 --- a/scrapy/utils/log.py +++ b/scrapy/utils/log.py @@ -154,7 +154,7 @@ class StreamLogger(object): """Fake file-like stream object that redirects writes to a logger instance Taken from: - http://www.electricmonk.nl/log/2011/08/14/redirect-stdout-and-stderr-to-a-logger-in-python/ + https://www.electricmonk.nl/log/2011/08/14/redirect-stdout-and-stderr-to-a-logger-in-python/ """ def __init__(self, logger, log_level=logging.INFO): self.logger = logger diff --git a/scrapy/utils/url.py b/scrapy/utils/url.py index 8eed31060..657c53815 100644 --- a/scrapy/utils/url.py +++ b/scrapy/utils/url.py @@ -47,7 +47,7 @@ def parse_url(url, encoding=None): def escape_ajax(url): """ Return the crawleable url according to: - http://code.google.com/web/ajaxcrawling/docs/getting-started.html + https://developers.google.com/webmasters/ajax-crawling/docs/getting-started >>> escape_ajax("www.example.com/ajax.html#!key=value") 'www.example.com/ajax.html?_escaped_fragment_=key%3Dvalue' diff --git a/sep/sep-001.rst b/sep/sep-001.rst index 2f0fe3500..3766f38fc 100644 --- a/sep/sep-001.rst +++ b/sep/sep-001.rst @@ -61,7 +61,7 @@ ItemForm -------- Pros: -- same API used for Items (see http://doc.scrapy.org/en/latest/topics/items.html) +- same API used for Items (see https://doc.scrapy.org/en/latest/topics/items.html) - some people consider setitem API more elegant than methods API Cons: diff --git a/sep/sep-006.rst b/sep/sep-006.rst index c0f945b66..522bba134 100644 --- a/sep/sep-006.rst +++ b/sep/sep-006.rst @@ -16,7 +16,7 @@ Motivation ========== When you use Selectors in Scrapy, your final goal is to "extract" the data that -you've selected, as the [http://doc.scrapy.org/en/latest/topics/selectors.html +you've selected, as the [https://doc.scrapy.org/en/latest/topics/selectors.html XPath Selectors documentation] says (bolding by me): When you’re scraping web pages, the most common task you need to perform is @@ -71,5 +71,5 @@ webpage or set of pages. References ========== - 1. XPath Selectors (http://doc.scrapy.org/topics/selectors.html) + 1. XPath Selectors (https://doc.scrapy.org/topics/selectors.html) 2. XPath and XSLT with lxml (http://codespeak.net/lxml/xpathxslt.html) diff --git a/sep/sep-013.rst b/sep/sep-013.rst index 4c11a0762..5b18b7501 100644 --- a/sep/sep-013.rst +++ b/sep/sep-013.rst @@ -44,7 +44,7 @@ Overview of changes proposed Most of the inconsistencies come from the fact that middlewares don't follow the typical -[http://twistedmatrix.com/projects/core/documentation/howto/defer.html +[https://twistedmatrix.com/projects/core/documentation/howto/defer.html deferred] callback/errback chaining logic. Twisted logic is fine and quite intuitive, and also fits middlewares very well. Due to some bad design choices the integration between middleware calls and deferred is far from optional. So diff --git a/sep/sep-017.rst b/sep/sep-017.rst index 7707a1622..86005e3c9 100644 --- a/sep/sep-017.rst +++ b/sep/sep-017.rst @@ -13,7 +13,7 @@ SEP-017: Spider Contracts The motivation for Spider Contracts is to build a lightweight mechanism for testing your spiders, and be able to run the tests quickly without having to wait for all the spider to run. It's partially based on the -[http://en.wikipedia.org/wiki/Design_by_contract Design by contract] approach +[https://en.wikipedia.org/wiki/Design_by_contract Design by contract] approach (hence its name) where you define certain conditions that spider callbacks must met, and you give example testing pages. diff --git a/sep/sep-020.rst b/sep/sep-020.rst index 49d068479..52d78097b 100644 --- a/sep/sep-020.rst +++ b/sep/sep-020.rst @@ -29,7 +29,7 @@ the rows and the further embedded ```` elements denoting the individual fields. One pattern that is particularly well suited for auto-populating an Item Loader -is the `definition list `_:: +is the `definition list `_::
diff --git a/setup.py b/setup.py index c03f0b9f7..327286f5a 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ if has_environment_marker_platform_impl_support(): setup( name='Scrapy', version=version, - url='http://scrapy.org', + url='https://scrapy.org', description='A high-level Web Crawling and Web Scraping framework', long_description=open('README.rst').read(), author='Scrapy developers', diff --git a/tests/__init__.py b/tests/__init__.py index c2e4fd2bf..55b1ecde8 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,7 +1,7 @@ """ tests: this package contains all Scrapy unittests -see http://doc.scrapy.org/en/latest/contributing.html#running-tests +see https://doc.scrapy.org/en/latest/contributing.html#running-tests """ import os diff --git a/tests/keys/example-com.conf b/tests/keys/example-com.conf index 8aa338cd5..1f9c25e43 100644 --- a/tests/keys/example-com.conf +++ b/tests/keys/example-com.conf @@ -1,4 +1,4 @@ -# this is copied from http://stackoverflow.com/a/27931596 +# this is copied from https://stackoverflow.com/a/27931596 [ req ] default_bits = 2048 default_keyfile = server-key.pem @@ -24,7 +24,7 @@ organizationName_default = Example, LLC # Use a friendly name here because its presented to the user. The server's DNS # names are placed in Subject Alternate Names. Plus, DNS names here is deprecated -# by both IETF and CA/Browser Forums. If you place a DNS name here, then you +# by both IETF and CA/Browser Forums. If you place a DNS name here, then you # must include the DNS name in the SAN too (otherwise, Chrome and others that # strictly follow the CA/Browser Baseline Requirements will fail). commonName = Common Name (e.g. server FQDN or YOUR name) -- GitLab