diff --git a/pytest.ini b/pytest.ini index 998633d541cab44c0f0574cc161fab2ebb5b0d5d..1570a3a75dd70248396470ec0458e9e3bea1e951 100644 --- a/pytest.ini +++ b/pytest.ini @@ -50,26 +50,26 @@ flake8-ignore = scrapy/core/engine.py E501 E128 scrapy/core/scheduler.py E501 scrapy/core/scraper.py E501 E128 - scrapy/core/spidermw.py E501 E126 + scrapy/core/spidermw.py E501 scrapy/core/downloader/__init__.py E501 - scrapy/core/downloader/contextfactory.py E501 E128 E126 + scrapy/core/downloader/contextfactory.py E501 E128 scrapy/core/downloader/middleware.py E501 scrapy/core/downloader/tls.py E501 - scrapy/core/downloader/webclient.py E501 E128 E126 + scrapy/core/downloader/webclient.py E501 E128 scrapy/core/downloader/handlers/__init__.py E501 scrapy/core/downloader/handlers/ftp.py E501 E128 scrapy/core/downloader/handlers/http10.py E501 scrapy/core/downloader/handlers/http11.py E501 - scrapy/core/downloader/handlers/s3.py E501 E128 E126 + scrapy/core/downloader/handlers/s3.py E501 E128 # scrapy/downloadermiddlewares scrapy/downloadermiddlewares/ajaxcrawl.py E501 scrapy/downloadermiddlewares/decompression.py E501 scrapy/downloadermiddlewares/defaultheaders.py E501 - scrapy/downloadermiddlewares/httpcache.py E501 E126 + scrapy/downloadermiddlewares/httpcache.py E501 scrapy/downloadermiddlewares/httpcompression.py E501 E128 scrapy/downloadermiddlewares/httpproxy.py E501 scrapy/downloadermiddlewares/redirect.py E501 - scrapy/downloadermiddlewares/retry.py E501 E126 + scrapy/downloadermiddlewares/retry.py E501 scrapy/downloadermiddlewares/robotstxt.py E501 scrapy/downloadermiddlewares/stats.py E501 # scrapy/extensions @@ -164,12 +164,12 @@ flake8-ignore = scrapy/robotstxt.py E501 scrapy/shell.py E501 scrapy/signalmanager.py E501 - scrapy/spiderloader.py F841 E501 E126 + scrapy/spiderloader.py F841 E501 scrapy/squeues.py E128 scrapy/statscollectors.py E501 # tests tests/__init__.py E402 E501 - tests/mockserver.py E501 E126 + tests/mockserver.py E501 tests/pipelines.py F841 tests/spiders.py E501 tests/test_closespider.py E501 @@ -181,18 +181,18 @@ flake8-ignore = tests/test_crawl.py E501 E741 tests/test_crawler.py F841 E501 tests/test_dependencies.py F841 E501 - tests/test_downloader_handlers.py E128 E501 E126 + tests/test_downloader_handlers.py E128 E501 tests/test_downloadermiddleware.py E501 tests/test_downloadermiddleware_ajaxcrawlable.py E501 - tests/test_downloadermiddleware_cookies.py E741 E501 E128 E126 + tests/test_downloadermiddleware_cookies.py E741 E501 E128 tests/test_downloadermiddleware_defaultheaders.py E501 tests/test_downloadermiddleware_downloadtimeout.py E501 tests/test_downloadermiddleware_httpcache.py E501 - tests/test_downloadermiddleware_httpcompression.py E501 E126 + tests/test_downloadermiddleware_httpcompression.py E501 tests/test_downloadermiddleware_decompression.py E501 tests/test_downloadermiddleware_httpproxy.py E501 E128 tests/test_downloadermiddleware_redirect.py E501 E128 - tests/test_downloadermiddleware_retry.py E501 E128 E126 + tests/test_downloadermiddleware_retry.py E501 E128 tests/test_downloadermiddleware_robotstxt.py E501 tests/test_downloadermiddleware_stats.py E501 tests/test_dupefilters.py E501 E741 E128 @@ -202,7 +202,7 @@ flake8-ignore = tests/test_feedexport.py E501 F841 tests/test_http_cookies.py E501 tests/test_http_headers.py E501 - tests/test_http_request.py E402 E501 E128 E128 E126 + tests/test_http_request.py E402 E501 E128 E128 tests/test_http_response.py E501 E128 tests/test_item.py E128 F841 tests/test_link.py E501 @@ -211,7 +211,7 @@ flake8-ignore = tests/test_logformatter.py E128 E501 tests/test_mail.py E128 E501 tests/test_middleware.py E501 E128 - tests/test_pipeline_crawl.py E501 E128 E126 + tests/test_pipeline_crawl.py E501 E128 tests/test_pipeline_files.py E501 tests/test_pipeline_images.py F841 E501 tests/test_pipeline_media.py E501 E741 E128 @@ -219,7 +219,7 @@ flake8-ignore = tests/test_request_cb_kwargs.py E501 tests/test_responsetypes.py E501 tests/test_robotstxt_interface.py E501 E501 - tests/test_scheduler.py E501 E126 + tests/test_scheduler.py E501 tests/test_selector.py E501 tests/test_spider.py E501 tests/test_spidermiddleware.py E501 @@ -243,8 +243,8 @@ flake8-ignore = tests/test_utils_response.py E501 tests/test_utils_signal.py E741 F841 tests/test_utils_sitemap.py E128 E501 - tests/test_utils_url.py E501 E501 E126 - tests/test_webclient.py E501 E128 E402 E126 + tests/test_utils_url.py E501 E501 + tests/test_webclient.py E501 E128 E402 tests/test_cmdline/__init__.py E501 tests/test_settings/__init__.py E501 E128 tests/test_spiderloader/__init__.py E128 E501 diff --git a/scrapy/core/downloader/contextfactory.py b/scrapy/core/downloader/contextfactory.py index 6e023ebcc99bb145d86f7b6fbebf247bac82f664..ab73e12c844c355b23ce38a8ba85a282672bcb57 100644 --- a/scrapy/core/downloader/contextfactory.py +++ b/scrapy/core/downloader/contextfactory.py @@ -86,8 +86,8 @@ class BrowserLikeContextFactory(ScrapyClientContextFactory): # # This means that a website like https://www.cacert.org will be rejected # by default, since CAcert.org CA certificate is seldom shipped. - return optionsForClientTLS(hostname.decode("ascii"), - trustRoot=platformTrust(), - extraCertificateOptions={ - 'method': self._ssl_method, - }) + return optionsForClientTLS( + hostname=hostname.decode("ascii"), + trustRoot=platformTrust(), + extraCertificateOptions={'method': self._ssl_method}, + ) diff --git a/scrapy/core/downloader/handlers/s3.py b/scrapy/core/downloader/handlers/s3.py index 40a1fa48ecd2970a6244eb5ce9385a5dd89fa6b8..8f63ad9742749eaeb5b5f1f0ef120ff05cf0ab76 100644 --- a/scrapy/core/downloader/handlers/s3.py +++ b/scrapy/core/downloader/handlers/s3.py @@ -100,11 +100,12 @@ class S3DownloadHandler: url=url, headers=awsrequest.headers.items()) else: signed_headers = self.conn.make_request( - method=request.method, - bucket=bucket, - key=unquote(p.path), - query_args=unquote(p.query), - headers=request.headers, - data=request.body) + method=request.method, + bucket=bucket, + key=unquote(p.path), + query_args=unquote(p.query), + headers=request.headers, + data=request.body, + ) request = request.replace(url=url, headers=signed_headers) return self._download_http(request, spider) diff --git a/scrapy/core/downloader/webclient.py b/scrapy/core/downloader/webclient.py index a90a77b2b176e15a9b5a4904f169caefbbdafe1c..355045d748077b2f4e1d0187f0ab71f254e85428 100644 --- a/scrapy/core/downloader/webclient.py +++ b/scrapy/core/downloader/webclient.py @@ -88,8 +88,8 @@ class ScrapyHTTPPageGetter(HTTPClient): self.transport.stopProducing() self.factory.noPage( - defer.TimeoutError("Getting %s took longer than %s seconds." % - (self.factory.url, self.factory.timeout))) + defer.TimeoutError("Getting %s took longer than %s seconds." + % (self.factory.url, self.factory.timeout))) class ScrapyHTTPClientFactory(HTTPClientFactory): diff --git a/scrapy/downloadermiddlewares/retry.py b/scrapy/downloadermiddlewares/retry.py index bbf5fca051269dd6c3a1714fb314057e6acdec56..6d11af5b22f548d7231ace69d3cd56d8b23a4d3d 100644 --- a/scrapy/downloadermiddlewares/retry.py +++ b/scrapy/downloadermiddlewares/retry.py @@ -12,9 +12,15 @@ once the spider has finished crawling all regular (non failed) pages. import logging from twisted.internet import defer -from twisted.internet.error import TimeoutError, DNSLookupError, \ - ConnectionRefusedError, ConnectionDone, ConnectError, \ - ConnectionLost, TCPTimedOutError +from twisted.internet.error import ( + ConnectError, + ConnectionDone, + ConnectionLost, + ConnectionRefusedError, + DNSLookupError, + TCPTimedOutError, + TimeoutError, +) from twisted.web.client import ResponseFailed from scrapy.exceptions import NotConfigured diff --git a/scrapy/spiderloader.py b/scrapy/spiderloader.py index 3be5aaec57c20edc097ea75cb79601b8d9cfe6dd..8dc89c2e911b4a57fbba2ce96baee5950134c1a8 100644 --- a/scrapy/spiderloader.py +++ b/scrapy/spiderloader.py @@ -24,15 +24,17 @@ class SpiderLoader: self._load_all_spiders() def _check_name_duplicates(self): - dupes = ["\n".join(" {cls} named {name!r} (in {module})".format( - module=mod, cls=cls, name=name) - for (mod, cls) in locations) - for name, locations in self._found.items() - if len(locations) > 1] + dupes = [] + for name, locations in self._found.items(): + dupes.extend([ + " {cls} named {name!r} (in {module})".format(module=mod, cls=cls, name=name) + for mod, cls in locations + ]) + if dupes: + dupes_string = "\n\n".join(dupes) msg = ("There are several spiders with the same name:\n\n" - "{}\n\n This can cause unexpected behavior.".format( - "\n\n".join(dupes))) + "{}\n\n This can cause unexpected behavior.".format(dupes_string)) warnings.warn(msg, UserWarning) def _load_spiders(self, module): @@ -45,11 +47,12 @@ class SpiderLoader: try: for module in walk_modules(name): self._load_spiders(module) - except ImportError as e: + except ImportError: if self.warn_only: - msg = ("\n{tb}Could not load spiders from module '{modname}'. " - "See above traceback for details.".format( - modname=name, tb=traceback.format_exc())) + msg = ( + "\n{tb}Could not load spiders from module '{modname}'. " + "See above traceback for details.".format(modname=name, tb=traceback.format_exc()) + ) warnings.warn(msg, RuntimeWarning) else: raise diff --git a/tests/test_downloadermiddleware_cookies.py b/tests/test_downloadermiddleware_cookies.py index b686a14d672d54c7758ad8db54182dcc473aee05..f86c50f50a13826159aa813e5c819fb55f86f3c3 100644 --- a/tests/test_downloadermiddleware_cookies.py +++ b/tests/test_downloadermiddleware_cookies.py @@ -139,10 +139,12 @@ class CookiesMiddlewareTest(TestCase): def test_complex_cookies(self): # merge some cookies into jar - cookies = [{'name': 'C1', 'value': 'value1', 'path': '/foo', 'domain': 'scrapytest.org'}, - {'name': 'C2', 'value': 'value2', 'path': '/bar', 'domain': 'scrapytest.org'}, - {'name': 'C3', 'value': 'value3', 'path': '/foo', 'domain': 'scrapytest.org'}, - {'name': 'C4', 'value': 'value4', 'path': '/foo', 'domain': 'scrapy.org'}] + cookies = [ + {'name': 'C1', 'value': 'value1', 'path': '/foo', 'domain': 'scrapytest.org'}, + {'name': 'C2', 'value': 'value2', 'path': '/bar', 'domain': 'scrapytest.org'}, + {'name': 'C3', 'value': 'value3', 'path': '/foo', 'domain': 'scrapytest.org'}, + {'name': 'C4', 'value': 'value4', 'path': '/foo', 'domain': 'scrapy.org'}, + ] req = Request('http://scrapytest.org/', cookies=cookies) self.mw.process_request(req, self.spider) diff --git a/tests/test_downloadermiddleware_retry.py b/tests/test_downloadermiddleware_retry.py index 9c989977e8dd7160968026b92b86fe10f56e4154..e118750e39dca38bddb060beb6c6dc525709bb39 100644 --- a/tests/test_downloadermiddleware_retry.py +++ b/tests/test_downloadermiddleware_retry.py @@ -1,8 +1,14 @@ import unittest from twisted.internet import defer -from twisted.internet.error import TimeoutError, DNSLookupError, \ - ConnectionRefusedError, ConnectionDone, ConnectError, \ - ConnectionLost, TCPTimedOutError +from twisted.internet.error import ( + ConnectError, + ConnectionDone, + ConnectionLost, + ConnectionRefusedError, + DNSLookupError, + TCPTimedOutError, + TimeoutError, +) from twisted.web.client import ResponseFailed from scrapy.downloadermiddlewares.retry import RetryMiddleware diff --git a/tests/test_http_request.py b/tests/test_http_request.py index 3b6d119a9880f2e30fa388b6be63f41018dd6253..77da15ce683c973356403eb0590fbe203cf92045 100644 --- a/tests/test_http_request.py +++ b/tests/test_http_request.py @@ -502,11 +502,13 @@ class FormRequestTest(RequestTest): def test_from_response_duplicate_form_key(self): response = _buildresponse( - '
', - url='http://www.example.com') - req = self.request_class.from_response(response, - method='GET', - formdata=(('foo', 'bar'), ('foo', 'baz'))) + '
', + url='http://www.example.com') + req = self.request_class.from_response( + response=response, + method='GET', + formdata=(('foo', 'bar'), ('foo', 'baz')), + ) self.assertEqual(urlparse(req.url).hostname, 'www.example.com') self.assertEqual(urlparse(req.url).query, 'foo=bar&foo=baz') @@ -530,9 +532,11 @@ class FormRequestTest(RequestTest): """) - req = self.request_class.from_response(response, - formdata={'one': ['two', 'three'], 'six': 'seven'}, - headers={"Accept-Encoding": "gzip,deflate"}) + req = self.request_class.from_response( + response=response, + formdata={'one': ['two', 'three'], 'six': 'seven'}, + headers={"Accept-Encoding": "gzip,deflate"}, + ) self.assertEqual(req.method, 'POST') self.assertEqual(req.headers['Content-type'], b'application/x-www-form-urlencoded') self.assertEqual(req.headers['Accept-Encoding'], b'gzip,deflate') @@ -580,9 +584,9 @@ class FormRequestTest(RequestTest): def test_from_response_override_method(self): response = _buildresponse( - ''' -
- ''') + ''' +
+ ''') request = FormRequest.from_response(response) self.assertEqual(request.method, 'GET') request = FormRequest.from_response(response, method='POST') @@ -590,9 +594,9 @@ class FormRequestTest(RequestTest): def test_from_response_override_url(self): response = _buildresponse( - ''' -
- ''') + ''' +
+ ''') request = FormRequest.from_response(response) self.assertEqual(request.url, 'http://example.com/app') request = FormRequest.from_response(response, url='http://foo.bar/absolute') diff --git a/tests/test_pipeline_crawl.py b/tests/test_pipeline_crawl.py index 962c3314402976ff6851371968e2a1743ce80168..24c5164736f54d7c4cd1e0c2799b156eb43055e9 100644 --- a/tests/test_pipeline_crawl.py +++ b/tests/test_pipeline_crawl.py @@ -44,9 +44,7 @@ class RedirectedMediaDownloadSpider(MediaDownloadSpider): name = 'redirectedmedia' def _process_url(self, url): - return add_or_replace_parameter( - self.mockserver.url('/redirect-to'), - 'goto', url) + return add_or_replace_parameter(self.mockserver.url('/redirect-to'), 'goto', url) class FileDownloadCrawlTestCase(TestCase): diff --git a/tests/test_webclient.py b/tests/test_webclient.py index b657c7ab6dd54b53059cda935ee28f49023c5818..307fadb5ce36c18babd1feae2b44ac01e581ef39 100644 --- a/tests/test_webclient.py +++ b/tests/test_webclient.py @@ -18,6 +18,14 @@ except ImportError: from twisted.python.filepath import FilePath from twisted.protocols.policies import WrappingFactory from twisted.internet.defer import inlineCallbacks +from twisted.web.test.test_webclient import ( + ForeverTakingResource, + ErrorResource, + NoLengthResource, + HostHeaderResource, + PayloadResource, + BrokenDownloadResource, +) from scrapy.core.downloader import webclient as client from scrapy.core.downloader.contextfactory import ScrapyClientContextFactory @@ -202,11 +210,6 @@ class ScrapyHTTPPageGetterTests(unittest.TestCase): Headers({'Hello': ['World'], 'Foo': ['Bar']})) -from twisted.web.test.test_webclient import ForeverTakingResource, \ - ErrorResource, NoLengthResource, HostHeaderResource, \ - PayloadResource, BrokenDownloadResource - - class EncodingResource(resource.Resource): out_encoding = 'cp1251'