Merge remote-tracking branch 'upstream/master' into retry-request

3f5a1956 · Adrián Chaves · 82546261 · fa3ebb1c · 3f5a1956 · 3f5a1956
49 changed file
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
+name: Checks
+on: [push, pull_request]
+
+jobs:
+  checks:
+    runs-on: ubuntu-18.04
+    strategy:
+      matrix:
+        include:
+        - python-version: 3.8
+          env:
+            TOXENV: security
+        - python-version: 3.8
+          env:
+            TOXENV: flake8
+        - python-version: 3.8
+          env:
+            TOXENV: pylint
+        - python-version: 3.8
+          env:
+            TOXENV: typing
+        - python-version: 3.7  # Keep in sync with .readthedocs.yml
+          env:
+            TOXENV: docs
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Run check
+      env: ${{ matrix.env }}
+      run: |
+        pip install -U tox
+        tox
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
+name: Publish
+on: [push]
+
+jobs:
+  publish:
+    runs-on: ubuntu-18.04
+    if: startsWith(github.event.ref, 'refs/tags/')
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Set up Python 3.8
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+
+    - name: Check Tag
+      id: check-release-tag
+      run: |
+        if [[ ${{ github.event.ref }} =~ ^refs/tags/[0-9]+[.][0-9]+[.][0-9]+(rc[0-9]+|[.]dev[0-9]+)?$ ]]; then
+          echo ::set-output name=release_tag::true
+        fi
+
+    - name: Publish to PyPI
+      if: steps.check-release-tag.outputs.release_tag == 'true'
+      run: |
+        pip install --upgrade setuptools wheel twine
+        python setup.py sdist bdist_wheel
+        export TWINE_USERNAME=__token__
+        export TWINE_PASSWORD=${{ secrets.PYPI_TOKEN }}
+        twine upload dist/*
--- a/.github/workflows/tests-macos.yml
+++ b/.github/workflows/tests-macos.yml
+name: macOS
+on: [push, pull_request]
+
+jobs:
+  tests:
+    runs-on: macos-10.15
+    strategy:
+      matrix:
+        python-version: [3.6, 3.7, 3.8]
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Run tests
+      run: |
+        pip install -U tox
+        tox -e py
+
+    - name: Upload coverage report
+      run: bash <(curl -s https://codecov.io/bash)
--- a/.github/workflows/tests-ubuntu.yml
+++ b/.github/workflows/tests-ubuntu.yml
+name: Ubuntu
+on: [push, pull_request]
+
+jobs:
+  tests:
+    runs-on: ubuntu-18.04
+    strategy:
+      matrix:
+        include:
+        - python-version: 3.7
+          env:
+            TOXENV: py
+        - python-version: 3.8
+          env:
+            TOXENV: py
+        - python-version: pypy3
+          env:
+            TOXENV: pypy3
+            PYPY_VERSION: 3.6-v7.3.1
+
+        # pinned deps
+        - python-version: 3.6.12
+          env:
+            TOXENV: pinned
+        - python-version: 3.6.12
+          env:
+            TOXENV: asyncio-pinned
+        - python-version: pypy3
+          env:
+            TOXENV: pypy3-pinned
+            PYPY_VERSION: 3.6-v7.2.0
+
+        # extras
+        - python-version: 3.8
+          env:
+            TOXENV: extra-deps
+        - python-version: 3.8
+          env:
+            TOXENV: asyncio
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Install system libraries
+      if: matrix.python-version == 'pypy3' || contains(matrix.env.TOXENV, 'pinned')
+      run: |
+        sudo apt-get update
+        sudo apt-get install libxml2-dev libxslt-dev
+
+    - name: Run tests
+      env: ${{ matrix.env }}
+      run: |
+        if [[ ! -z "$PYPY_VERSION" ]]; then
+          export PYPY_VERSION="pypy$PYPY_VERSION-linux64"
+          wget "https://downloads.python.org/pypy/${PYPY_VERSION}.tar.bz2"
+          tar -jxf ${PYPY_VERSION}.tar.bz2
+          $PYPY_VERSION/bin/pypy3 -m venv "$HOME/virtualenvs/$PYPY_VERSION"
+          source "$HOME/virtualenvs/$PYPY_VERSION/bin/activate"
+        fi
+        pip install -U tox
+        tox
+
+    - name: Upload coverage report
+      run: bash <(curl -s https://codecov.io/bash)
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
-name: Run test suite
+name: Windows
 on: [push, pull_request]

 jobs:
-  test-windows:
-    name: "Windows Tests"
-    runs-on: ${{ matrix.os }}
+  tests:
+    runs-on: windows-latest
    strategy:
      matrix:
-        os: [windows-latest]
-        python-version: [3.7, 3.8]
-        env: [TOXENV: py]
        include:
-        - os: windows-latest
-          python-version: 3.6
+        - python-version: 3.6
          env:
            TOXENV: windows-pinned
+        - python-version: 3.7
+          env:
+            TOXENV: py
+        - python-version: 3.8
+          env:
+            TOXENV: py

    steps:
    - uses: actions/checkout@v2

    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v1
+      uses: actions/setup-python@v2
      with:
        python-version: ${{ matrix.python-version }}

-    - name: Run test suite
+    - name: Run tests
      env: ${{ matrix.env }}
      run: |
-        pip install -U tox twine wheel codecov
+        pip install -U tox
        tox
--- a/.travis.yml
+++ b/.travis.yml
-language: python
-dist: xenial
-branches:
-  only:
-    - master
-    - /^\d\.\d+$/
-    - /^\d\.\d+\.\d+(rc\d+|\.dev\d+)?$/
-matrix:
-  include:
-    - env: TOXENV=security
-      python: 3.8
-    - env: TOXENV=flake8
-      python: 3.8
-    - env: TOXENV=pylint
-      python: 3.8
-    - env: TOXENV=docs
-      python: 3.7  # Keep in sync with .readthedocs.yml
-    - env: TOXENV=typing
-      python: 3.8
-
-    - env: TOXENV=pinned
-      python: 3.6.1
-    - env: TOXENV=asyncio-pinned
-      python: 3.6.1
-    - env: TOXENV=pypy3-pinned PYPY_VERSION=3.6-v7.2.0
-
-    - env: TOXENV=py
-      python: 3.6
-    - env: TOXENV=pypy3 PYPY_VERSION=3.6-v7.3.1
-
-    - env: TOXENV=py
-      python: 3.7
-
-    - env: TOXENV=py PYPI_RELEASE_JOB=true
-      python: 3.8
-      dist: bionic
-    - env: TOXENV=extra-deps
-      python: 3.8
-      dist: bionic
-    - env: TOXENV=asyncio
-      python: 3.8
-      dist: bionic
-install:
-  - |
-      if [[ ! -z "$PYPY_VERSION" ]]; then
-        export PYPY_VERSION="pypy$PYPY_VERSION-linux64"
-        wget "https://downloads.python.org/pypy/${PYPY_VERSION}.tar.bz2"
-        tar -jxf ${PYPY_VERSION}.tar.bz2
-        virtualenv --python="$PYPY_VERSION/bin/pypy3" "$HOME/virtualenvs/$PYPY_VERSION"
-        source "$HOME/virtualenvs/$PYPY_VERSION/bin/activate"
-      fi
-  - pip install -U tox twine wheel codecov
-
-script: tox
-after_success:
-  - codecov
-notifications:
-  irc:
-    use_notice: true
-    skip_join: true
-    channels:
-    - irc.freenode.org#scrapy
-cache:
-  directories:
-    - $HOME/.cache/pip
-deploy:
-  provider: pypi
-  distributions: "sdist bdist_wheel"
-  user: scrapy
-  password:
-    secure: JaAKcy1AXWXDK3LXdjOtKyaVPCSFoCGCnW15g4f65E/8Fsi9ZzDfmBa4Equs3IQb/vs/if2SVrzJSr7arN7r9Z38Iv1mUXHkFAyA3Ym8mThfABBzzcUWEQhIHrCX0Tdlx9wQkkhs+PZhorlmRS4gg5s6DzPaeA2g8SCgmlRmFfA=
-  on:
-    tags: true
-    repo: scrapy/scrapy
-    condition: "$PYPI_RELEASE_JOB == true && $TRAVIS_TAG =~ ^[0-9]+[.][0-9]+[.][0-9]+(rc[0-9]+|[.]dev[0-9]+)?$"
--- a/AUTHORS
+++ b/AUTHORS
 Scrapy was brought to life by Shane Evans while hacking a scraping framework
 prototype for Mydeco (mydeco.com). It soon became maintained, extended and
 improved by Insophia (insophia.com), with the initial sponsorship of Mydeco to
-bootstrap the project. In mid-2011, Scrapinghub became the new official
-maintainer.
+bootstrap the project. In mid-2011, Scrapinghub (now Zyte) became the new
+official maintainer.

 Here is the list of the primary authors & contributors:


--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -55,7 +55,7 @@ further defined and clarified by project maintainers.
 ## Enforcement

 Instances of abusive, harassing, or otherwise unacceptable behavior may be
-reported by contacting the project team at opensource@scrapinghub.com. All
+reported by contacting the project team at opensource@zyte.com. All
 complaints will be reviewed and investigated and will result in a response that
 is deemed necessary and appropriate to the circumstances. The project team is
 obligated to maintain confidentiality with regard to the reporter of an incident.

--- a/README.rst
+++ b/README.rst
@@ -10,9 +10,17 @@ Scrapy
   :target: https://pypi.python.org/pypi/Scrapy
   :alt: Supported Python Versions

-.. image:: https://img.shields.io/travis/scrapy/scrapy/master.svg
-   :target: https://travis-ci.org/scrapy/scrapy
-   :alt: Build Status
+.. image:: https://github.com/scrapy/scrapy/workflows/Ubuntu/badge.svg
+   :target: https://github.com/scrapy/scrapy/actions?query=workflow%3AUbuntu
+   :alt: Ubuntu
+
+.. image:: https://github.com/scrapy/scrapy/workflows/macOS/badge.svg
+   :target: https://github.com/scrapy/scrapy/actions?query=workflow%3AmacOS
+   :alt: macOS
+
+.. image:: https://github.com/scrapy/scrapy/workflows/Windows/badge.svg
+   :target: https://github.com/scrapy/scrapy/actions?query=workflow%3AWindows
+   :alt: Windows

 .. image:: https://img.shields.io/badge/wheel-yes-brightgreen.svg
   :target: https://pypi.python.org/pypi/Scrapy
@@ -34,9 +42,16 @@ Scrapy is a fast high-level web crawling and web scraping framework, used to
 crawl websites and extract structured data from their pages. It can be used for
 a wide range of purposes, from data mining to monitoring and automated testing.

+Scrapy is maintained by Zyte_ (formerly Scrapinghub) and `many other
+contributors`_.
+
+.. _many other contributors: https://github.com/scrapy/scrapy/graphs/contributors
+.. _Zyte: https://www.zyte.com/
+
 Check the Scrapy homepage at https://scrapy.org for more information,
 including a list of features.

+
 Requirements
 ============

@@ -81,7 +96,7 @@ Please note that this project is released with a Contributor Code of Conduct
 (see https://github.com/scrapy/scrapy/blob/master/CODE_OF_CONDUCT.md).

 By participating in this project you agree to abide by its terms.
-Please report unacceptable behavior to opensource@scrapinghub.com.
+Please report unacceptable behavior to opensource@zyte.com.

 Companies using Scrapy
 ======================

--- a/conftest.py
+++ b/conftest.py
@@ -2,6 +2,8 @@ from pathlib import Path

 import pytest

+from scrapy.utils.reactor import install_reactor
+
 from tests.keys import generate_keys


@@ -40,6 +42,14 @@ def pytest_collection_modifyitems(session, config, items):
        pass


+def pytest_addoption(parser):
+    parser.addoption(
+        "--reactor",
+        default="default",
+        choices=["default", "asyncio"],
+    )
+
+
 @pytest.fixture(scope='class')
 def reactor_pytest(request):
    if not request.cls:
@@ -55,5 +65,10 @@ def only_asyncio(request, reactor_pytest):
        pytest.skip('This test is only run with --reactor=asyncio')


+def pytest_configure(config):
+    if config.getoption("--reactor") == "asyncio":
+        install_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor")
+
+
 # Generate localhost certificate files, needed by some tests
 generate_keys()
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -283,6 +283,7 @@ coverage_ignore_pyobjects = [
 intersphinx_mapping = {
    'attrs': ('https://www.attrs.org/en/stable/', None),
    'coverage': ('https://coverage.readthedocs.io/en/stable', None),
+    'cryptography' : ('https://cryptography.io/en/latest/', None),
    'cssselect': ('https://cssselect.readthedocs.io/en/latest', None),
    'itemloaders': ('https://itemloaders.readthedocs.io/en/latest/', None),
    'pytest': ('https://docs.pytest.org/en/latest', None),

--- a/docs/intro/install.rst
+++ b/docs/intro/install.rst
@@ -69,10 +69,9 @@ In case of any trouble related to these dependencies,
 please refer to their respective installation instructions:

 * `lxml installation`_
-* `cryptography installation`_
+* :doc:`cryptography installation <cryptography:installation>`

 .. _lxml installation: https://lxml.de/installation.html
-.. _cryptography installation: https://cryptography.io/en/latest/installation/


 .. _intro-using-virtualenv:
@@ -265,10 +264,8 @@ For details, see `Issue #2473 <https://github.com/scrapy/scrapy/issues/2473>`_.
 .. _cryptography: https://cryptography.io/en/latest/
 .. _pyOpenSSL: https://pypi.org/project/pyOpenSSL/
 .. _setuptools: https://pypi.python.org/pypi/setuptools
-.. _AUR Scrapy package: https://aur.archlinux.org/packages/scrapy/
 .. _homebrew: https://brew.sh/
 .. _zsh: https://www.zsh.org/
-.. _Scrapinghub: https://scrapinghub.com
 .. _Anaconda: https://docs.anaconda.com/anaconda/
 .. _Miniconda: https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html
 .. _conda-forge: https://conda-forge.org/
--- a/docs/news.rst
+++ b/docs/news.rst
@@ -2428,7 +2428,7 @@ Bug fixes
 - Fix compatibility with Twisted 17+ (:issue:`2496`, :issue:`2528`).
 - Fix ``scrapy.Item`` inheritance on Python 3.6 (:issue:`2511`).
 - Enforce numeric values for components order in ``SPIDER_MIDDLEWARES``,
-  ``DOWNLOADER_MIDDLEWARES``, ``EXTENIONS`` and ``SPIDER_CONTRACTS`` (:issue:`2420`).
+  ``DOWNLOADER_MIDDLEWARES``, ``EXTENSIONS`` and ``SPIDER_CONTRACTS`` (:issue:`2420`).

 Documentation
 ~~~~~~~~~~~~~

--- a/docs/topics/deploy.rst
+++ b/docs/topics/deploy.rst
@@ -14,7 +14,7 @@ spiders come in.
 Popular choices for deploying Scrapy spiders are:

 * :ref:`Scrapyd <deploy-scrapyd>` (open source)
-* :ref:`Scrapy Cloud <deploy-scrapy-cloud>` (cloud-based)
+* :ref:`Zyte Scrapy Cloud <deploy-scrapy-cloud>` (cloud-based)

 .. _deploy-scrapyd:

@@ -32,28 +32,28 @@ Scrapyd is maintained by some of the Scrapy developers.

 .. _deploy-scrapy-cloud:

-Deploying to Scrapy Cloud
-=========================
+Deploying to Zyte Scrapy Cloud
+==============================

-`Scrapy Cloud`_ is a hosted, cloud-based service by `Scrapinghub`_,
-the company behind Scrapy.
+`Zyte Scrapy Cloud`_ is a hosted, cloud-based service by Zyte_, the company
+behind Scrapy.

-Scrapy Cloud removes the need to setup and monitor servers
-and provides a nice UI to manage spiders and review scraped items,
-logs and stats.
+Zyte Scrapy Cloud removes the need to setup and monitor servers and provides a
+nice UI to manage spiders and review scraped items, logs and stats.

-To deploy spiders to Scrapy Cloud you can use the `shub`_ command line tool.
-Please refer to the `Scrapy Cloud documentation`_ for more information.
+To deploy spiders to Zyte Scrapy Cloud you can use the `shub`_ command line
+tool.
+Please refer to the `Zyte Scrapy Cloud documentation`_ for more information.

-Scrapy Cloud is compatible with Scrapyd and one can switch between
+Zyte Scrapy Cloud is compatible with Scrapyd and one can switch between
 them as needed - the configuration is read from the ``scrapy.cfg`` file
 just like ``scrapyd-deploy``.

-.. _Scrapyd: https://github.com/scrapy/scrapyd
 .. _Deploying your project: https://scrapyd.readthedocs.io/en/latest/deploy.html
-.. _Scrapy Cloud: https://scrapinghub.com/scrapy-cloud
+.. _Scrapyd: https://github.com/scrapy/scrapyd
 .. _scrapyd-client: https://github.com/scrapy/scrapyd-client
-.. _shub: https://doc.scrapinghub.com/shub.html
 .. _scrapyd-deploy documentation: https://scrapyd.readthedocs.io/en/latest/deploy.html
-.. _Scrapy Cloud documentation: https://doc.scrapinghub.com/scrapy-cloud.html
-.. _Scrapinghub: https://scrapinghub.com/
+.. _shub: https://shub.readthedocs.io/en/latest/
+.. _Zyte: https://zyte.com/
+.. _Zyte Scrapy Cloud: https://www.zyte.com/scrapy-cloud/
+.. _Zyte Scrapy Cloud documentation: https://docs.zyte.com/scrapy-cloud.html
--- a/docs/topics/exporters.rst
+++ b/docs/topics/exporters.rst
@@ -123,7 +123,7 @@ Example::
          def serialize_field(self, field, name, value):
              if field == 'price':
                  return f'$ {str(value)}'
-              return super(Product, self).serialize_field(field, name, value)
+              return super().serialize_field(field, name, value)

 .. _topics-exporters-reference:


--- a/docs/topics/logging.rst
+++ b/docs/topics/logging.rst
@@ -101,7 +101,7 @@ instance, which can be accessed and used like this::
    class MySpider(scrapy.Spider):

        name = 'myspider'
-        start_urls = ['https://scrapinghub.com']
+        start_urls = ['https://scrapy.org']

        def parse(self, response):
            self.logger.info('Parse function called on %s', response.url)
@@ -117,7 +117,7 @@ Python logger you want. For example::
    class MySpider(scrapy.Spider):

        name = 'myspider'
-        start_urls = ['https://scrapinghub.com']
+        start_urls = ['https://scrapy.org']

        def parse(self, response):
            logger.info('Parse function called on %s', response.url)

--- a/docs/topics/practices.rst
+++ b/docs/topics/practices.rst
@@ -63,7 +63,7 @@ project as example.
    process = CrawlerProcess(get_project_settings())

    # 'followall' is the name of one of the spiders of the project.
-    process.crawl('followall', domain='scrapinghub.com')
+    process.crawl('followall', domain='scrapy.org')
    process.start() # the script will block here until the crawling is finished

 There's another Scrapy utility that provides more control over the crawling
@@ -244,7 +244,7 @@ Here are some tips to keep in mind when dealing with these kinds of sites:
  super proxy that you can attach your own proxies to.
 * use a highly distributed downloader that circumvents bans internally, so you
  can just focus on parsing clean pages. One example of such downloaders is
-  `Crawlera`_
+  `Zyte Smart Proxy Manager`_

 If you are still unable to prevent your bot getting banned, consider contacting
 `commercial support`_.
@@ -254,5 +254,5 @@ If you are still unable to prevent your bot getting banned, consider contacting
 .. _ProxyMesh: https://proxymesh.com/
 .. _Google cache: http://www.googleguide.com/cached_pages.html
 .. _testspiders: https://github.com/scrapinghub/testspiders
-.. _Crawlera: https://scrapinghub.com/crawlera
 .. _scrapoxy: https://scrapoxy.io/
+.. _Zyte Smart Proxy Manager: https://www.zyte.com/smart-proxy-manager/
--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@@ -693,9 +693,19 @@ Response objects
    :param ip_address: The IP address of the server from which the Response originated.
    :type ip_address: :class:`ipaddress.IPv4Address` or :class:`ipaddress.IPv6Address`

+    :param protocol: The protocol that was used to download the response.
+        For instance: "HTTP/1.0", "HTTP/1.1"
+    :type protocol: :class:`str`
+
+    .. versionadded:: 2.0.0
+       The ``certificate`` parameter.
+
    .. versionadded:: 2.1.0
       The ``ip_address`` parameter.

+    .. versionadded:: VERSION
+       The ``protocol`` parameter.
+
    .. attribute:: Response.url

        A string containing the URL of the response.
@@ -780,6 +790,8 @@ Response objects

    .. attribute:: Response.certificate

+        .. versionadded:: 2.0.0
+
        A :class:`twisted.internet.ssl.Certificate` object representing
        the server's SSL certificate.

@@ -795,6 +807,17 @@ Response objects
        handler, i.e. for ``http(s)`` responses. For other handlers,
        :attr:`ip_address` is always ``None``.

+    .. attribute:: Response.protocol
+
+        .. versionadded:: VERSION
+
+        The protocol that was used to download the response.
+        For instance: "HTTP/1.0", "HTTP/1.1"
+
+        This attribute is currently only populated by the HTTP download
+        handlers, i.e. for ``http(s)`` responses. For other handlers,
+        :attr:`protocol` is always ``None``.
+
    .. method:: Response.copy()

       Returns a new Response which is a copy of this Response.

--- a/docs/topics/selectors.rst
+++ b/docs/topics/selectors.rst
@@ -464,10 +464,10 @@ effectively. If you are not much familiar with XPath yet,
 you may want to take a look first at this `XPath tutorial`_.

 .. note::
-    Some of the tips are based on `this post from ScrapingHub's blog`_.
+    Some of the tips are based on `this post from Zyte's blog`_.

 .. _`XPath tutorial`: http://www.zvon.org/comp/r/tut-XPath_1.html
-.. _`this post from ScrapingHub's blog`: https://blog.scrapinghub.com/2014/07/17/xpath-tips-from-the-web-scraping-trenches/
+.. _this post from Zyte's blog: https://www.zyte.com/blog/xpath-tips-from-the-web-scraping-trenches/


 .. _topics-selectors-relative-xpaths:

--- a/pytest.ini
+++ b/pytest.ini
@@ -18,7 +18,6 @@ addopts =
    --ignore=docs/topics/stats.rst
    --ignore=docs/topics/telnetconsole.rst
    --ignore=docs/utils
-twisted = 1
 markers =
    only_asyncio: marks tests as only enabled when --reactor=asyncio is passed
 flake8-max-line-length = 119
@@ -36,8 +35,5 @@ flake8-ignore =
    scrapy/spiders/__init__.py E402 F401

    # Issues pending a review:
-    scrapy/utils/http.py F403
-    scrapy/utils/markup.py F403
-    scrapy/utils/multipart.py F403
    scrapy/utils/url.py F403 F405
    tests/test_loader.py E741
--- a/scrapy/core/downloader/handlers/http11.py
+++ b/scrapy/core/downloader/handlers/http11.py
@@ -303,11 +303,14 @@ class ScrapyAgent:
            proxyHost = to_unicode(proxyHost)
            omitConnectTunnel = b'noconnect' in proxyParams
            if omitConnectTunnel:
-                warnings.warn("Using HTTPS proxies in the noconnect mode is deprecated. "
-                              "If you use Crawlera, it doesn't require this mode anymore, "
-                              "so you should update scrapy-crawlera to 1.3.0+ "
-                              "and remove '?noconnect' from the Crawlera URL.",
-                              ScrapyDeprecationWarning)
+                warnings.warn(
+                    "Using HTTPS proxies in the noconnect mode is deprecated. "
+                    "If you use Zyte Smart Proxy Manager (formerly Crawlera), "
+                    "it doesn't require this mode anymore, so you should "
+                    "update scrapy-crawlera to 1.3.0+ and remove '?noconnect' "
+                    "from the Zyte Smart Proxy Manager URL.",
+                    ScrapyDeprecationWarning,
+                )
            if scheme == b'https' and not omitConnectTunnel:
                proxyAuth = request.headers.get(b'Proxy-Authorization', None)
                proxyConf = (proxyHost, proxyPort, proxyAuth)
@@ -434,6 +437,11 @@ class ScrapyAgent:
    def _cb_bodydone(self, result, request, url):
        headers = Headers(result["txresponse"].headers.getAllRawHeaders())
        respcls = responsetypes.from_args(headers=headers, url=url, body=result["body"])
+        try:
+            version = result["txresponse"].version
+            protocol = f"{to_unicode(version[0])}/{version[1]}.{version[2]}"
+        except (AttributeError, TypeError, IndexError):
+            protocol = None
        response = respcls(
            url=url,
            status=int(result["txresponse"].code),
@@ -442,6 +450,7 @@ class ScrapyAgent:
            flags=result["flags"],
            certificate=result["certificate"],
            ip_address=result["ip_address"],
+            protocol=protocol,
        )
        if result.get("failure"):
            result["failure"].value.response = response

--- a/scrapy/core/downloader/webclient.py
+++ b/scrapy/core/downloader/webclient.py
@@ -7,7 +7,7 @@ from twisted.internet.protocol import ClientFactory

 from scrapy.http import Headers
 from scrapy.utils.httpobj import urlparse_cached
-from scrapy.utils.python import to_bytes
+from scrapy.utils.python import to_bytes, to_unicode
 from scrapy.responsetypes import responsetypes


@@ -110,7 +110,7 @@ class ScrapyHTTPClientFactory(ClientFactory):
        status = int(self.status)
        headers = Headers(self.response_headers)
        respcls = responsetypes.from_args(headers=headers, url=self._url)
-        return respcls(url=self._url, status=status, headers=headers, body=body)
+        return respcls(url=self._url, status=status, headers=headers, body=body, protocol=to_unicode(self.version))

    def _set_connection_attributes(self, request):
        parsed = urlparse_cached(request)

--- a/scrapy/core/scheduler.py
+++ b/scrapy/core/scheduler.py
 import os
 import json
 import logging
-import warnings
 from os.path import join, exists

-from queuelib import PriorityQueue
-
 from scrapy.utils.misc import load_object, create_instance
 from scrapy.utils.job import job_dir
-from scrapy.utils.deprecate import ScrapyDeprecationWarning


 logger = logging.getLogger(__name__)
@@ -56,14 +52,6 @@ class Scheduler:
        dupefilter_cls = load_object(settings['DUPEFILTER_CLASS'])
        dupefilter = create_instance(dupefilter_cls, settings, crawler)
        pqclass = load_object(settings['SCHEDULER_PRIORITY_QUEUE'])
-        if pqclass is PriorityQueue:
-            warnings.warn("SCHEDULER_PRIORITY_QUEUE='queuelib.PriorityQueue'"
-                          " is no longer supported because of API changes; "
-                          "please use 'scrapy.pqueues.ScrapyPriorityQueue'",
-                          ScrapyDeprecationWarning)
-            from scrapy.pqueues import ScrapyPriorityQueue
-            pqclass = ScrapyPriorityQueue
-
        dqclass = load_object(settings['SCHEDULER_DISK_QUEUE'])
        mqclass = load_object(settings['SCHEDULER_MEMORY_QUEUE'])
        logunser = settings.getbool('SCHEDULER_DEBUG')

--- a/scrapy/core/spidermw.py
+++ b/scrapy/core/spidermw.py
@@ -41,86 +41,92 @@ class SpiderMiddlewareManager(MiddlewareManager):
        process_spider_exception = getattr(mw, 'process_spider_exception', None)
        self.methods['process_spider_exception'].appendleft(process_spider_exception)

-    def scrape_response(self, scrape_func, response, request, spider):
-
-        def process_spider_input(response):
-            for method in self.methods['process_spider_input']:
-                try:
-                    result = method(response=response, spider=spider)
-                    if result is not None:
-                        msg = (f"Middleware {_fname(method)} must return None "
-                               f"or raise an exception, got {type(result)}")
-                        raise _InvalidOutput(msg)
-                except _InvalidOutput:
-                    raise
-                except Exception:
-                    return scrape_func(Failure(), request, spider)
-            return scrape_func(response, request, spider)
-
-        def _evaluate_iterable(iterable, exception_processor_index, recover_to):
+    def _process_spider_input(self, scrape_func, response, request, spider):
+        for method in self.methods['process_spider_input']:
            try:
-                for r in iterable:
-                    yield r
-            except Exception as ex:
-                exception_result = process_spider_exception(Failure(ex), exception_processor_index)
-                if isinstance(exception_result, Failure):
-                    raise
-                recover_to.extend(exception_result)
-
-        def process_spider_exception(_failure, start_index=0):
-            exception = _failure.value
-            # don't handle _InvalidOutput exception
-            if isinstance(exception, _InvalidOutput):
-                return _failure
-            method_list = islice(self.methods['process_spider_exception'], start_index, None)
-            for method_index, method in enumerate(method_list, start=start_index):
-                if method is None:
-                    continue
-                result = method(response=response, exception=exception, spider=spider)
-                if _isiterable(result):
-                    # stop exception handling by handing control over to the
-                    # process_spider_output chain if an iterable has been returned
-                    return process_spider_output(result, method_index + 1)
-                elif result is None:
-                    continue
-                else:
+                result = method(response=response, spider=spider)
+                if result is not None:
                    msg = (f"Middleware {_fname(method)} must return None "
-                           f"or an iterable, got {type(result)}")
+                           f"or raise an exception, got {type(result)}")
                    raise _InvalidOutput(msg)
+            except _InvalidOutput:
+                raise
+            except Exception:
+                return scrape_func(Failure(), request, spider)
+        return scrape_func(response, request, spider)
+
+    def _evaluate_iterable(self, response, spider, iterable, exception_processor_index, recover_to):
+        try:
+            for r in iterable:
+                yield r
+        except Exception as ex:
+            exception_result = self._process_spider_exception(response, spider, Failure(ex),
+                                                              exception_processor_index)
+            if isinstance(exception_result, Failure):
+                raise
+            recover_to.extend(exception_result)
+
+    def _process_spider_exception(self, response, spider, _failure, start_index=0):
+        exception = _failure.value
+        # don't handle _InvalidOutput exception
+        if isinstance(exception, _InvalidOutput):
            return _failure
+        method_list = islice(self.methods['process_spider_exception'], start_index, None)
+        for method_index, method in enumerate(method_list, start=start_index):
+            if method is None:
+                continue
+            result = method(response=response, exception=exception, spider=spider)
+            if _isiterable(result):
+                # stop exception handling by handing control over to the
+                # process_spider_output chain if an iterable has been returned
+                return self._process_spider_output(response, spider, result, method_index + 1)
+            elif result is None:
+                continue
+            else:
+                msg = (f"Middleware {_fname(method)} must return None "
+                       f"or an iterable, got {type(result)}")
+                raise _InvalidOutput(msg)
+        return _failure
+
+    def _process_spider_output(self, response, spider, result, start_index=0):
+        # items in this iterable do not need to go through the process_spider_output
+        # chain, they went through it already from the process_spider_exception method
+        recovered = MutableChain()
+
+        method_list = islice(self.methods['process_spider_output'], start_index, None)
+        for method_index, method in enumerate(method_list, start=start_index):
+            if method is None:
+                continue
+            try:
+                # might fail directly if the output value is not a generator
+                result = method(response=response, result=result, spider=spider)
+            except Exception as ex:
+                exception_result = self._process_spider_exception(response, spider, Failure(ex), method_index + 1)
+                if isinstance(exception_result, Failure):
+                    raise
+                return exception_result
+            if _isiterable(result):
+                result = self._evaluate_iterable(response, spider, result, method_index + 1, recovered)
+            else:
+                msg = (f"Middleware {_fname(method)} must return an "
+                       f"iterable, got {type(result)}")
+                raise _InvalidOutput(msg)

-        def process_spider_output(result, start_index=0):
-            # items in this iterable do not need to go through the process_spider_output
-            # chain, they went through it already from the process_spider_exception method
-            recovered = MutableChain()
-
-            method_list = islice(self.methods['process_spider_output'], start_index, None)
-            for method_index, method in enumerate(method_list, start=start_index):
-                if method is None:
-                    continue
-                try:
-                    # might fail directly if the output value is not a generator
-                    result = method(response=response, result=result, spider=spider)
-                except Exception as ex:
-                    exception_result = process_spider_exception(Failure(ex), method_index + 1)
-                    if isinstance(exception_result, Failure):
-                        raise
-                    return exception_result
-                if _isiterable(result):
-                    result = _evaluate_iterable(result, method_index + 1, recovered)
-                else:
-                    msg = (f"Middleware {_fname(method)} must return an "
-                           f"iterable, got {type(result)}")
-                    raise _InvalidOutput(msg)
+        return MutableChain(result, recovered)

-            return MutableChain(result, recovered)
+    def _process_callback_output(self, response, spider, result):
+        recovered = MutableChain()
+        result = self._evaluate_iterable(response, spider, result, 0, recovered)
+        return MutableChain(self._process_spider_output(response, spider, result), recovered)

+    def scrape_response(self, scrape_func, response, request, spider):
        def process_callback_output(result):
-            recovered = MutableChain()
-            result = _evaluate_iterable(result, 0, recovered)
-            return MutableChain(process_spider_output(result), recovered)
+            return self._process_callback_output(response, spider, result)
+
+        def process_spider_exception(_failure):
+            return self._process_spider_exception(response, spider, _failure)

-        dfd = mustbe_deferred(process_spider_input, response)
+        dfd = mustbe_deferred(self._process_spider_input, scrape_func, response, request, spider)
        dfd.addCallbacks(callback=process_callback_output, errback=process_spider_exception)
        return dfd


--- a/scrapy/http/request/form.py
+++ b/scrapy/http/request/form.py
@@ -160,7 +160,7 @@ def _select_value(ele, n, v):
    multiple = ele.multiple
    if v is None and not multiple:
        # Match browser behaviour on simple select tag without options selected
-        # And for select tags wihout options
+        # And for select tags without options
        o = ele.value_options
        return (n, o[0]) if o else (None, None)
    elif v is not None and multiple:

--- a/scrapy/http/response/__init__.py
+++ b/scrapy/http/response/__init__.py
@@ -17,8 +17,18 @@ from scrapy.utils.trackref import object_ref

 class Response(object_ref):

-    def __init__(self, url, status=200, headers=None, body=b'', flags=None,
-                 request=None, certificate=None, ip_address=None):
+    def __init__(
+        self,
+        url,
+        status=200,
+        headers=None,
+        body=b"",
+        flags=None,
+        request=None,
+        certificate=None,
+        ip_address=None,
+        protocol=None,
+    ):
        self.headers = Headers(headers or {})
        self.status = int(status)
        self._set_body(body)
@@ -27,6 +37,7 @@ class Response(object_ref):
        self.flags = [] if flags is None else list(flags)
        self.certificate = certificate
        self.ip_address = ip_address
+        self.protocol = protocol

    @property
    def cb_kwargs(self):
@@ -89,8 +100,9 @@ class Response(object_ref):
        """Create a new Response with the same attributes except for those
        given new values.
        """
-        for x in ['url', 'status', 'headers', 'body',
-                  'request', 'flags', 'certificate', 'ip_address']:
+        for x in [
+            "url", "status", "headers", "body", "request", "flags", "certificate", "ip_address", "protocol",
+        ]:
            kwargs.setdefault(x, getattr(self, x))
        cls = kwargs.pop('cls', self.__class__)
        return cls(*args, **kwargs)

--- a/scrapy/pipelines/media.py
+++ b/scrapy/pipelines/media.py
@@ -86,7 +86,7 @@ class MediaPipeline:
        info = self.spiderinfo
        requests = arg_to_iter(self.get_media_requests(item, info))
        dlist = [self._process_request(r, info, item) for r in requests]
-        dfd = DeferredList(dlist, consumeErrors=1)
+        dfd = DeferredList(dlist, consumeErrors=True)
        return dfd.addCallback(self.item_completed, item, info)

    def _process_request(self, request, info, item):

--- a/scrapy/utils/asyncgen.py
+++ b/scrapy/utils/asyncgen.py
+async def collect_asyncgen(result):
+    results = []
+    async for x in result:
+        results.append(x)
+    return results
--- a/scrapy/utils/defer.py
+++ b/scrapy/utils/defer.py
@@ -105,7 +105,7 @@ def process_parallel(callbacks, input, *a, **kw):
    callbacks
    """
    dfds = [defer.succeed(input).addCallback(x, *a, **kw) for x in callbacks]
-    d = defer.DeferredList(dfds, fireOnOneErrback=1, consumeErrors=1)
+    d = defer.DeferredList(dfds, fireOnOneErrback=True, consumeErrors=True)
    d.addCallbacks(lambda r: [x[1] for x in r], lambda f: f.value.subFailure)
    return d


--- a/scrapy/utils/gz.py
+++ b/scrapy/utils/gz.py
+import struct
 from gzip import GzipFile
 from io import BytesIO
-import re
-import struct

 from scrapy.utils.decorators import deprecated

@@ -42,17 +41,5 @@ def gunzip(data):
    return b''.join(output_list)


-_is_gzipped = re.compile(br'^application/(x-)?gzip\b', re.I).search
-_is_octetstream = re.compile(br'^(application|binary)/octet-stream\b', re.I).search
-
-
-@deprecated
-def is_gzipped(response):
-    """Return True if the response is gzipped, or False otherwise"""
-    ctype = response.headers.get('Content-Type', b'')
-    cenc = response.headers.get('Content-Encoding', b'').lower()
-    return _is_gzipped(ctype) or _is_octetstream(ctype) and cenc in (b'gzip', b'x-gzip')
-
-
 def gzip_magic_number(response):
    return response.body[:3] == b'\x1f\x8b\x08'
--- a/scrapy/utils/http.py
+++ b/scrapy/utils/http.py
-"""
-Transitional module for moving to the w3lib library.
-
-For new code, always import from w3lib.http instead of this module
-"""
-
-import warnings
-
-from scrapy.exceptions import ScrapyDeprecationWarning
-from scrapy.utils.decorators import deprecated
-from w3lib.http import *  # noqa: F401
-
-
-warnings.warn("Module `scrapy.utils.http` is deprecated, "
-              "Please import from `w3lib.http` instead.",
-              ScrapyDeprecationWarning, stacklevel=2)
-
-
-@deprecated
-def decode_chunked_transfer(chunked_body):
-    """Parsed body received with chunked transfer encoding, and return the
-    decoded body.
-
-    For more info see:
-    https://en.wikipedia.org/wiki/Chunked_transfer_encoding
-
-    """
-    body, h, t = '', '', chunked_body
-    while t:
-        h, t = t.split('\r\n', 1)
-        if h == '0':
-            break
-        size = int(h, 16)
-        body += t[:size]
-        t = t[size + 2:]
-    return body
--- a/scrapy/utils/markup.py
+++ b/scrapy/utils/markup.py
-"""
-Transitional module for moving to the w3lib library.
-
-For new code, always import from w3lib.html instead of this module
-"""
-import warnings
-
-from scrapy.exceptions import ScrapyDeprecationWarning
-from w3lib.html import *  # noqa: F401
-
-
-warnings.warn("Module `scrapy.utils.markup` is deprecated. "
-              "Please import from `w3lib.html` instead.",
-              ScrapyDeprecationWarning, stacklevel=2)
--- a/scrapy/utils/multipart.py
+++ b/scrapy/utils/multipart.py
-"""
-Transitional module for moving to the w3lib library.
-
-For new code, always import from w3lib.form instead of this module
-"""
-import warnings
-
-from scrapy.exceptions import ScrapyDeprecationWarning
-from w3lib.form import *  # noqa: F401
-
-
-warnings.warn("Module `scrapy.utils.multipart` is deprecated. "
-              "If you're using `encode_multipart` function, please use "
-              "`urllib3.filepost.encode_multipart_formdata` instead",
-              ScrapyDeprecationWarning, stacklevel=2)
--- a/scrapy/utils/project.py
+++ b/scrapy/utils/project.py
 import os
-import pickle
 import warnings

 from importlib import import_module
@@ -68,18 +67,10 @@ def get_project_settings():
    if settings_module_path:
        settings.setmodule(settings_module_path, priority='project')

-    pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
-    if pickled_settings:
-        warnings.warn("Use of environment variable "
-                      "'SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE' "
-                      "is deprecated.", ScrapyDeprecationWarning)
-        settings.setdict(pickle.loads(pickled_settings), priority='project')
-
    scrapy_envvars = {k[7:]: v for k, v in os.environ.items() if
                      k.startswith('SCRAPY_')}
    valid_envvars = {
        'CHECK',
-        'PICKLED_SETTINGS_TO_OVERRIDE',
        'PROJECT',
        'PYTHON_SHELL',
        'SETTINGS_MODULE',

--- a/scrapy/utils/py36.py
+++ b/scrapy/utils/py36.py
-"""
-Helpers using Python 3.6+ syntax (ignore SyntaxError on import).
-"""
+import warnings

+from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.utils.asyncgen import collect_asyncgen  # noqa: F401

-async def collect_asyncgen(result):
-    results = []
-    async for x in result:
-        results.append(x)
-    return results
+
+warnings.warn(
+    "Module `scrapy.utils.py36` is deprecated, please import from `scrapy.utils.asyncgen` instead.",
+    category=ScrapyDeprecationWarning,
+    stacklevel=2,
+)
--- a/scrapy/utils/spider.py
+++ b/scrapy/utils/spider.py
@@ -4,17 +4,14 @@ import logging
 from scrapy.spiders import Spider
 from scrapy.utils.defer import deferred_from_coro
 from scrapy.utils.misc import arg_to_iter
-try:
-    from scrapy.utils.py36 import collect_asyncgen
-except SyntaxError:
-    collect_asyncgen = None
+from scrapy.utils.asyncgen import collect_asyncgen


 logger = logging.getLogger(__name__)


 def iterate_spider_output(result):
-    if collect_asyncgen and hasattr(inspect, 'isasyncgen') and inspect.isasyncgen(result):
+    if inspect.isasyncgen(result):
        d = deferred_from_coro(collect_asyncgen(result))
        d.addCallback(iterate_spider_output)
        return d

--- a/setup.cfg
+++ b/setup.cfg
@@ -55,9 +55,6 @@ ignore_errors = True
 [mypy-scrapy.utils.response]
 ignore_errors = True

-[mypy-scrapy.utils.spider]
-ignore_errors = True
-
 [mypy-scrapy.utils.trackref]
 ignore_errors = True


--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,6 @@ install_requires = [
    'cssselect>=0.9.1',
    'itemloaders>=1.0.1',
    'parsel>=1.5.0',
-    'PyDispatcher>=2.0.5',
    'pyOpenSSL>=16.2.0',
    'queuelib>=1.4.2',
    'service_identity>=16.0.0',
@@ -34,11 +33,12 @@ install_requires = [
    'itemadapter>=0.1.0',
 ]
 extras_require = {}
-
+cpython_dependencies = [
+    'lxml>=3.5.0',
+    'PyDispatcher>=2.0.5',
+]
 if has_environment_marker_platform_impl_support():
-    extras_require[':platform_python_implementation == "CPython"'] = [
-        'lxml>=3.5.0',
-    ]
+    extras_require[':platform_python_implementation == "CPython"'] = cpython_dependencies
    extras_require[':platform_python_implementation == "PyPy"'] = [
        # Earlier lxml versions are affected by
        # https://foss.heptapod.net/pypy/pypy/-/issues/2498,
@@ -49,14 +49,14 @@ if has_environment_marker_platform_impl_support():
        'PyPyDispatcher>=2.1.0',
    ]
 else:
-    install_requires.append('lxml>=3.5.0')
+    install_requires.extend(cpython_dependencies)


 setup(
    name='Scrapy',
    version=version,
    url='https://scrapy.org',
-    project_urls = {
+    project_urls={
        'Documentation': 'https://docs.scrapy.org/',
        'Source': 'https://github.com/scrapy/scrapy',
        'Tracker': 'https://github.com/scrapy/scrapy/issues',

--- a/tests/requirements-py3.txt
+++ b/tests/requirements-py3.txt
 # Tests requirements
 attrs
 dataclasses; python_version == '3.6'
-mitmproxy; python_version >= '3.7'
-mitmproxy >= 4.0.4, < 5; python_version >= '3.6' and python_version < '3.7'
 pyftpdlib
-# https://github.com/pytest-dev/pytest-twisted/issues/93
-pytest != 5.4, != 5.4.1
+pytest
 pytest-cov
-pytest-twisted >= 1.11
 pytest-xdist
 sybil >= 1.3.0  # https://github.com/cjw296/sybil/issues/20#issuecomment-605433422
 testfixtures
-uvloop; platform_system != "Windows"
+uvloop < 0.15.0; platform_system != "Windows" and python_version == '3.6'
+uvloop; platform_system != "Windows" and python_version > '3.6'

 # optional for shell wrapper tests
 bpython
 brotlipy  # optional for HTTP compress downloader middleware tests
-zstandard  # optional for HTTP compress downloader middleware tests
+zstandard; implementation_name != 'pypy'  # optional for HTTP compress downloader middleware tests
 ipython
 pywin32; sys_platform == "win32"
--- a/tests/test_dependencies.py
+++ b/tests/test_dependencies.py
+import os
+import re
+from configparser import ConfigParser
 from importlib import import_module
+
+from twisted import version as twisted_version
 from twisted.trial import unittest


 class ScrapyUtilsTest(unittest.TestCase):
+
    def test_required_openssl_version(self):
        try:
            module = import_module('OpenSSL')
@@ -13,6 +19,32 @@ class ScrapyUtilsTest(unittest.TestCase):
            installed_version = [int(x) for x in module.__version__.split('.')[:2]]
            assert installed_version >= [0, 6], "OpenSSL >= 0.6 required"

+    def test_pinned_twisted_version(self):
+        """When running tests within a Tox environment with pinned
+        dependencies, make sure that the version of Twisted is the pinned
+        version.
+
+        See https://github.com/scrapy/scrapy/pull/4814#issuecomment-706230011
+        """
+        if not os.environ.get('_SCRAPY_PINNED', None):
+            self.skipTest('Not in a pinned environment')
+
+        tox_config_file_path = os.path.join(
+            os.path.dirname(__file__),
+            '..',
+            'tox.ini',
+        )
+        config_parser = ConfigParser()
+        config_parser.read(tox_config_file_path)
+        pattern = r'Twisted==([\d.]+)'
+        match = re.search(pattern, config_parser['pinned']['deps'])
+        pinned_twisted_version_string = match[1]
+
+        self.assertEqual(
+            twisted_version.short(),
+            pinned_twisted_version_string
+        )
+

 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_downloader_handlers.py
+++ b/tests/test_downloader_handlers.py
@@ -115,6 +115,7 @@ class FileTestCase(unittest.TestCase):
            self.assertEqual(response.url, request.url)
            self.assertEqual(response.status, 200)
            self.assertEqual(response.body, b'0123456789')
+            self.assertEqual(response.protocol, None)

        request = Request(path_to_file_uri(self.tmpname + '^'))
        assert request.url.upper().endswith('%5E')
@@ -360,6 +361,13 @@ class Http10TestCase(HttpTestCase):
    """HTTP 1.0 test case"""
    download_handler_cls = HTTP10DownloadHandler

+    def test_protocol(self):
+        request = Request(self.getURL("host"), method="GET")
+        d = self.download_request(request, Spider("foo"))
+        d.addCallback(lambda r: r.protocol)
+        d.addCallback(self.assertEqual, "HTTP/1.0")
+        return d
+

 class Https10TestCase(Http10TestCase):
    scheme = 'https'
@@ -489,6 +497,13 @@ class Http11TestCase(HttpTestCase):
    def test_download_broken_chunked_content_allow_data_loss_via_setting(self):
        return self.test_download_broken_content_allow_data_loss_via_setting('broken-chunked')

+    def test_protocol(self):
+        request = Request(self.getURL("host"), method="GET")
+        d = self.download_request(request, Spider("foo"))
+        d.addCallback(lambda r: r.protocol)
+        d.addCallback(self.assertEqual, "HTTP/1.1")
+        return d
+

 class Https11TestCase(Http11TestCase):
    scheme = 'https'
@@ -962,6 +977,7 @@ class BaseFTPTestCase(unittest.TestCase):
            self.assertEqual(r.status, 200)
            self.assertEqual(r.body, b'I have the power!')
            self.assertEqual(r.headers, {b'Local Filename': [b''], b'Size': [b'17']})
+            self.assertIsNone(r.protocol)
        return self._add_test_callbacks(d, _test)

    def test_ftp_download_path_with_spaces(self):
@@ -1120,3 +1136,10 @@ class DataURITestCase(unittest.TestCase):

        request = Request('data:text/plain;base64,SGVsbG8sIHdvcmxkLg%3D%3D')
        return self.download_request(request, self.spider).addCallback(_test)
+
+    def test_protocol(self):
+        def _test(response):
+            self.assertIsNone(response.protocol)
+
+        request = Request("data:,")
+        return self.download_request(request, self.spider).addCallback(_test)
--- a/tests/test_downloadermiddleware.py
+++ b/tests/test_downloadermiddleware.py
 import asyncio
-from unittest import mock
+from unittest import mock, SkipTest

 from pytest import mark
+from twisted import version as twisted_version
 from twisted.internet import defer
 from twisted.internet.defer import Deferred
 from twisted.trial.unittest import TestCase
 from twisted.python.failure import Failure
+from twisted.python.versions import Version

 from scrapy.http import Request, Response
 from scrapy.spiders import Spider
@@ -211,10 +213,21 @@ class MiddlewareUsingDeferreds(ManagerTestCase):
        self.assertFalse(download_func.called)


+@mark.usefixtures('reactor_pytest')
 class MiddlewareUsingCoro(ManagerTestCase):
    """Middlewares using asyncio coroutines should work"""

    def test_asyncdef(self):
+        if (
+            self.reactor_pytest == 'asyncio'
+            and twisted_version < Version('twisted', 18, 4, 0)
+        ):
+            raise SkipTest(
+                'Due to https://twistedmatrix.com/trac/ticket/9390, this test '
+                'hangs when using AsyncIO and Twisted versions lower than '
+                '18.4.0'
+            )
+
        resp = Response('http://example.com/index.html')

        class CoroMiddleware:
@@ -235,6 +248,12 @@ class MiddlewareUsingCoro(ManagerTestCase):

    @mark.only_asyncio()
    def test_asyncdef_asyncio(self):
+        if twisted_version < Version('twisted', 18, 4, 0):
+            raise SkipTest(
+                'Due to https://twistedmatrix.com/trac/ticket/9390, this test '
+                'hangs when using Twisted versions lower than 18.4.0'
+            )
+
        resp = Response('http://example.com/index.html')

        class CoroMiddleware:

--- a/tests/test_proxy_connect.py
+++ b/tests/test_proxy_connect.py
 import json
 import os
-import platform
 import re
 import sys
 from subprocess import Popen, PIPE
 from urllib.parse import urlsplit, urlunsplit
-from unittest import skipIf
-
 from testfixtures import LogCapture
 from twisted.internet import defer
 from twisted.trial.unittest import TestCase
@@ -57,13 +54,14 @@ def _wrong_credentials(proxy_url):
    return urlunsplit(bad_auth_proxy)


-@skipIf("pypy" in sys.executable,
-        "mitmproxy does not support PyPy")
-@skipIf(platform.system() == 'Windows' and sys.version_info < (3, 7),
-        "mitmproxy does not support Windows when running Python < 3.7")
 class ProxyConnectTestCase(TestCase):

    def setUp(self):
+        try:
+            import mitmproxy  # noqa: F401
+        except ImportError:
+            self.skipTest('mitmproxy is not installed')
+
        self.mockserver = MockServer()
        self.mockserver.__enter__()
        self._oldenv = os.environ.copy()

--- a/tests/test_utils_defer.py
+++ b/tests/test_utils_defer.py
+from pytest import mark
 from twisted.trial import unittest
 from twisted.internet import reactor, defer
 from twisted.python.failure import Failure

 from scrapy.utils.defer import (
+    deferred_f_from_coro_f,
    iter_errback,
    mustbe_deferred,
    process_chain,
@@ -117,3 +119,18 @@ class IterErrbackTest(unittest.TestCase):
        self.assertEqual(out, [0, 1, 2, 3, 4])
        self.assertEqual(len(errors), 1)
        self.assertIsInstance(errors[0].value, ZeroDivisionError)
+
+
+class AsyncDefTestsuiteTest(unittest.TestCase):
+    @deferred_f_from_coro_f
+    async def test_deferred_f_from_coro_f(self):
+        pass
+
+    @deferred_f_from_coro_f
+    async def test_deferred_f_from_coro_f_generator(self):
+        yield
+
+    @mark.xfail(reason="Checks that the test is actually executed", strict=True)
+    @deferred_f_from_coro_f
+    async def test_deferred_f_from_coro_f_xfail(self):
+        raise Exception("This is expected to be raised")
--- a/tests/test_utils_gz.py
+++ b/tests/test_utils_gz.py
@@ -3,10 +3,11 @@ from os.path import join

 from w3lib.encoding import html_to_unicode

-from scrapy.utils.gz import gunzip, is_gzipped
-from scrapy.http import Response, Headers
+from scrapy.utils.gz import gunzip, gzip_magic_number
+from scrapy.http import Response
 from tests import tests_datadir

+
 SAMPLEDIR = join(tests_datadir, 'compressed')


@@ -14,8 +15,12 @@ class GunzipTest(unittest.TestCase):

    def test_gunzip_basic(self):
        with open(join(SAMPLEDIR, 'feed-sample1.xml.gz'), 'rb') as f:
-            text = gunzip(f.read())
-            self.assertEqual(len(text), 9950)
+            r1 = Response("http://www.example.com", body=f.read())
+            self.assertTrue(gzip_magic_number(r1))
+
+            r2 = Response("http://www.example.com", body=gunzip(r1.body))
+            self.assertFalse(gzip_magic_number(r2))
+            self.assertEqual(len(r2.body), 9950)

    def test_gunzip_truncated(self):
        with open(join(SAMPLEDIR, 'truncated-crc-error.gz'), 'rb') as f:
@@ -28,46 +33,16 @@ class GunzipTest(unittest.TestCase):

    def test_gunzip_truncated_short(self):
        with open(join(SAMPLEDIR, 'truncated-crc-error-short.gz'), 'rb') as f:
-            text = gunzip(f.read())
-            assert text.endswith(b'</html>')
-
-    def test_is_x_gzipped_right(self):
-        hdrs = Headers({"Content-Type": "application/x-gzip"})
-        r1 = Response("http://www.example.com", headers=hdrs)
-        self.assertTrue(is_gzipped(r1))
+            r1 = Response("http://www.example.com", body=f.read())
+            self.assertTrue(gzip_magic_number(r1))

-    def test_is_gzipped_right(self):
-        hdrs = Headers({"Content-Type": "application/gzip"})
-        r1 = Response("http://www.example.com", headers=hdrs)
-        self.assertTrue(is_gzipped(r1))
-
-    def test_is_gzipped_not_quite(self):
-        hdrs = Headers({"Content-Type": "application/gzippppp"})
-        r1 = Response("http://www.example.com", headers=hdrs)
-        self.assertFalse(is_gzipped(r1))
-
-    def test_is_gzipped_case_insensitive(self):
-        hdrs = Headers({"Content-Type": "Application/X-Gzip"})
-        r1 = Response("http://www.example.com", headers=hdrs)
-        self.assertTrue(is_gzipped(r1))
-
-        hdrs = Headers({"Content-Type": "application/X-GZIP ; charset=utf-8"})
-        r1 = Response("http://www.example.com", headers=hdrs)
-        self.assertTrue(is_gzipped(r1))
+            r2 = Response("http://www.example.com", body=gunzip(r1.body))
+            assert r2.body.endswith(b'</html>')
+            self.assertFalse(gzip_magic_number(r2))

    def test_is_gzipped_empty(self):
        r1 = Response("http://www.example.com")
-        self.assertFalse(is_gzipped(r1))
-
-    def test_is_gzipped_wrong(self):
-        hdrs = Headers({"Content-Type": "application/javascript"})
-        r1 = Response("http://www.example.com", headers=hdrs)
-        self.assertFalse(is_gzipped(r1))
-
-    def test_is_gzipped_with_charset(self):
-        hdrs = Headers({"Content-Type": "application/x-gzip;charset=utf-8"})
-        r1 = Response("http://www.example.com", headers=hdrs)
-        self.assertTrue(is_gzipped(r1))
+        self.assertFalse(gzip_magic_number(r1))

    def test_gunzip_illegal_eof(self):
        with open(join(SAMPLEDIR, 'unexpected-eof.gz'), 'rb') as f:

--- a/tests/test_utils_http.py
+++ b/tests/test_utils_http.py
-import unittest
-
-from scrapy.utils.http import decode_chunked_transfer
-
-
-class ChunkedTest(unittest.TestCase):
-
-    def test_decode_chunked_transfer(self):
-        """Example taken from: http://en.wikipedia.org/wiki/Chunked_transfer_encoding"""
-        chunked_body = "25\r\n" + "This is the data in the first chunk\r\n\r\n"
-        chunked_body += "1C\r\n" + "and this is the second one\r\n\r\n"
-        chunked_body += "3\r\n" + "con\r\n"
-        chunked_body += "8\r\n" + "sequence\r\n"
-        chunked_body += "0\r\n\r\n"
-        body = decode_chunked_transfer(chunked_body)
-        self.assertEqual(
-            body,
-            "This is the data in the first chunk\r\nand this is the second one\r\nconsequence"
-        )
--- a/tests/test_utils_signal.py
+++ b/tests/test_utils_signal.py
 import asyncio
+from unittest import SkipTest

+from pydispatch import dispatcher
 from pytest import mark
 from testfixtures import LogCapture
-from twisted.trial import unittest
-from twisted.python.failure import Failure
+from twisted import version as twisted_version
 from twisted.internet import defer, reactor
-from pydispatch import dispatcher
+from twisted.python.failure import Failure
+from twisted.python.versions import Version
+from twisted.trial import unittest

 from scrapy.utils.signal import send_catch_log, send_catch_log_deferred
 from scrapy.utils.test import get_from_asyncio_queue
@@ -68,6 +71,7 @@ class SendCatchLogDeferredTest2(SendCatchLogDeferredTest):
        return d


+@mark.usefixtures('reactor_pytest')
 class SendCatchLogDeferredAsyncDefTest(SendCatchLogDeferredTest):

    async def ok_handler(self, arg, handlers_called):
@@ -76,6 +80,19 @@ class SendCatchLogDeferredAsyncDefTest(SendCatchLogDeferredTest):
        await defer.succeed(42)
        return "OK"

+    def test_send_catch_log(self):
+        if (
+            self.reactor_pytest == 'asyncio'
+            and twisted_version < Version('twisted', 18, 4, 0)
+        ):
+            raise SkipTest(
+                'Due to https://twistedmatrix.com/trac/ticket/9390, this test '
+                'fails due to a timeout when using AsyncIO and Twisted '
+                'versions lower than 18.4.0'
+            )
+
+        return super().test_send_catch_log()
+

 @mark.only_asyncio()
 class SendCatchLogDeferredAsyncioTest(SendCatchLogDeferredTest):
@@ -86,6 +103,16 @@ class SendCatchLogDeferredAsyncioTest(SendCatchLogDeferredTest):
        await asyncio.sleep(0.2)
        return await get_from_asyncio_queue("OK")

+    def test_send_catch_log(self):
+        if twisted_version < Version('twisted', 18, 4, 0):
+            raise SkipTest(
+                'Due to https://twistedmatrix.com/trac/ticket/9390, this test '
+                'fails due to a timeout when using Twisted versions lower '
+                'than 18.4.0'
+            )
+
+        return super().test_send_catch_log()
+

 class SendCatchLogTest2(unittest.TestCase):


--- a/tests/test_webclient.py
+++ b/tests/test_webclient.py
@@ -4,7 +4,10 @@ Tests borrowed from the twisted.web.client tests.
 """
 import os
 import shutil
+import sys
+from pkg_resources import parse_version

+import cryptography
 import OpenSSL.SSL
 from twisted.trial import unittest
 from twisted.web import server, static, util, resource
@@ -414,6 +417,8 @@ class WebClientCustomCiphersSSLTestCase(WebClientSSLTestCase):
        ).addCallback(self.assertEqual, to_bytes(s))

    def testPayloadDisabledCipher(self):
+        if sys.implementation.name == "pypy" and parse_version(cryptography.__version__) <= parse_version("2.3.1"):
+            self.skipTest("This does work in PyPy with cryptography<=2.3.1")
        s = "0123456789" * 10
        settings = Settings({'DOWNLOADER_CLIENT_TLS_CIPHERS': 'ECDHE-RSA-AES256-GCM-SHA384'})
        client_context_factory = create_instance(ScrapyClientContextFactory, settings=settings, crawler=None)

--- a/tox.ini
+++ b/tox.ini
@@ -11,6 +11,10 @@ minversion = 1.7.0
 deps =
    -ctests/constraints.txt
    -rtests/requirements-py3.txt
+    # mitmproxy does not support PyPy
+    # mitmproxy does not support Windows when running Python < 3.7
+    mitmproxy >= 4.0.4; python_version >= '3.7' and implementation_name != 'pypy'
+    mitmproxy >= 4.0.4, < 5; python_version >= '3.6' and python_version < '3.7' and platform_system != 'Windows' and implementation_name != 'pypy'
    # Extras
    botocore>=1.4.87
    Pillow>=4.0.0
@@ -20,8 +24,10 @@ passenv =
    AWS_SECRET_ACCESS_KEY
    GCS_TEST_FILE_URI
    GCS_PROJECT_ID
+#allow tox virtualenv to upgrade pip/wheel/setuptools
+download = true
 commands =
-    py.test --cov=scrapy --cov-report= {posargs:--durations=10 docs scrapy tests}
+    py.test --cov=scrapy --cov-report=xml --cov-report= {posargs:--durations=10 docs scrapy tests}

 [testenv:typing]
 basepython = python3
@@ -66,7 +72,6 @@ deps =
    itemadapter==0.1.0
    parsel==1.5.0
    Protego==0.1.15
-    PyDispatcher==2.0.5
    pyOpenSSL==16.2.0
    queuelib==1.4.2
    service_identity==16.0.0
@@ -74,15 +79,24 @@ deps =
    w3lib==1.17.0
    zope.interface==4.1.3
    -rtests/requirements-py3.txt
+
+    # mitmproxy 4.0.4+ requires upgrading some of the pinned dependencies
+    # above, hence we do not install it in pinned environments at the moment
+
    # Extras
    botocore==1.4.87
    google-cloud-storage==1.29.0
    Pillow==4.0.0
+setenv =
+    _SCRAPY_PINNED=true

 [testenv:pinned]
 deps =
    {[pinned]deps}
    lxml==3.5.0
+    PyDispatcher==2.0.5
+setenv =
+    {[pinned]setenv}

 [testenv:windows-pinned]
 basepython = python3
@@ -91,6 +105,9 @@ deps =
    # First lxml version that includes a Windows wheel for Python 3.6, so we do
    # not need to build lxml from sources in a CI Windows job:
    lxml==3.8.0
+    PyDispatcher==2.0.5
+setenv =
+    {[pinned]setenv}

 [testenv:extra-deps]
 deps =
@@ -103,8 +120,10 @@ commands =
    {[testenv]commands} --reactor=asyncio

 [testenv:asyncio-pinned]
-commands = {[testenv:asyncio]commands}
 deps = {[testenv:pinned]deps}
+commands = {[testenv:asyncio]commands}
+setenv =
+    {[pinned]setenv}

 [testenv:pypy3]
 basepython = pypy3
@@ -113,11 +132,13 @@ commands =

 [testenv:pypy3-pinned]
 basepython = {[testenv:pypy3]basepython}
-commands = {[testenv:pypy3]commands}
 deps =
    {[pinned]deps}
    lxml==4.0.0
    PyPyDispatcher==2.1.0
+commands = {[testenv:pypy3]commands}
+setenv =
+    {[pinned]setenv}

 [docs]
 changedir = docs