Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
looyolo
scrapy
提交
4c12a234
S
scrapy
项目概览
looyolo
/
scrapy
与 Fork 源项目一致
从无法访问的项目Fork
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
S
scrapy
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
4c12a234
编写于
5月 06, 2020
作者:
E
Eugenio Lacuesta
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Flake8: Remove E126
上级
cc23d1cb
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
96 addition
and
73 deletion
+96
-73
pytest.ini
pytest.ini
+17
-17
scrapy/core/downloader/contextfactory.py
scrapy/core/downloader/contextfactory.py
+5
-5
scrapy/core/downloader/handlers/s3.py
scrapy/core/downloader/handlers/s3.py
+7
-6
scrapy/core/downloader/webclient.py
scrapy/core/downloader/webclient.py
+2
-2
scrapy/downloadermiddlewares/retry.py
scrapy/downloadermiddlewares/retry.py
+9
-3
scrapy/spiderloader.py
scrapy/spiderloader.py
+14
-11
tests/test_downloadermiddleware_cookies.py
tests/test_downloadermiddleware_cookies.py
+6
-4
tests/test_downloadermiddleware_retry.py
tests/test_downloadermiddleware_retry.py
+9
-3
tests/test_http_request.py
tests/test_http_request.py
+18
-14
tests/test_pipeline_crawl.py
tests/test_pipeline_crawl.py
+1
-3
tests/test_webclient.py
tests/test_webclient.py
+8
-5
未找到文件。
pytest.ini
浏览文件 @
4c12a234
...
...
@@ -50,26 +50,26 @@ flake8-ignore =
scrapy/core/engine.py
E501
E128
scrapy/core/scheduler.py
E501
scrapy/core/scraper.py
E501
E128
scrapy/core/spidermw.py
E501
E126
scrapy/core/spidermw.py
E501
scrapy/core/downloader/__init__.py
E501
scrapy/core/downloader/contextfactory.py
E501
E128
E126
scrapy/core/downloader/contextfactory.py
E501
E128
scrapy/core/downloader/middleware.py
E501
scrapy/core/downloader/tls.py
E501
scrapy/core/downloader/webclient.py
E501
E128
E126
scrapy/core/downloader/webclient.py
E501
E128
scrapy/core/downloader/handlers/__init__.py
E501
scrapy/core/downloader/handlers/ftp.py
E501
E128
scrapy/core/downloader/handlers/http10.py
E501
scrapy/core/downloader/handlers/http11.py
E501
scrapy/core/downloader/handlers/s3.py
E501
E128
E126
scrapy/core/downloader/handlers/s3.py
E501
E128
# scrapy/downloadermiddlewares
scrapy/downloadermiddlewares/ajaxcrawl.py
E501
scrapy/downloadermiddlewares/decompression.py
E501
scrapy/downloadermiddlewares/defaultheaders.py
E501
scrapy/downloadermiddlewares/httpcache.py
E501
E126
scrapy/downloadermiddlewares/httpcache.py
E501
scrapy/downloadermiddlewares/httpcompression.py
E501
E128
scrapy/downloadermiddlewares/httpproxy.py
E501
scrapy/downloadermiddlewares/redirect.py
E501
scrapy/downloadermiddlewares/retry.py
E501
E126
scrapy/downloadermiddlewares/retry.py
E501
scrapy/downloadermiddlewares/robotstxt.py
E501
scrapy/downloadermiddlewares/stats.py
E501
# scrapy/extensions
...
...
@@ -164,12 +164,12 @@ flake8-ignore =
scrapy/robotstxt.py
E501
scrapy/shell.py
E501
scrapy/signalmanager.py
E501
scrapy/spiderloader.py
F841
E501
E126
scrapy/spiderloader.py
F841
E501
scrapy/squeues.py
E128
scrapy/statscollectors.py
E501
# tests
tests/__init__.py
E402
E501
tests/mockserver.py
E501
E126
tests/mockserver.py
E501
tests/pipelines.py
F841
tests/spiders.py
E501
tests/test_closespider.py
E501
...
...
@@ -181,18 +181,18 @@ flake8-ignore =
tests/test_crawl.py
E501
E741
tests/test_crawler.py
F841
E501
tests/test_dependencies.py
F841
E501
tests/test_downloader_handlers.py
E128
E501
E126
tests/test_downloader_handlers.py
E128
E501
tests/test_downloadermiddleware.py
E501
tests/test_downloadermiddleware_ajaxcrawlable.py
E501
tests/test_downloadermiddleware_cookies.py
E741
E501
E128
E126
tests/test_downloadermiddleware_cookies.py
E741
E501
E128
tests/test_downloadermiddleware_defaultheaders.py
E501
tests/test_downloadermiddleware_downloadtimeout.py
E501
tests/test_downloadermiddleware_httpcache.py
E501
tests/test_downloadermiddleware_httpcompression.py
E501
E126
tests/test_downloadermiddleware_httpcompression.py
E501
tests/test_downloadermiddleware_decompression.py
E501
tests/test_downloadermiddleware_httpproxy.py
E501
E128
tests/test_downloadermiddleware_redirect.py
E501
E128
tests/test_downloadermiddleware_retry.py
E501
E128
E126
tests/test_downloadermiddleware_retry.py
E501
E128
tests/test_downloadermiddleware_robotstxt.py
E501
tests/test_downloadermiddleware_stats.py
E501
tests/test_dupefilters.py
E501
E741
E128
...
...
@@ -202,7 +202,7 @@ flake8-ignore =
tests/test_feedexport.py
E501
F841
tests/test_http_cookies.py
E501
tests/test_http_headers.py
E501
tests/test_http_request.py
E402
E501
E128
E128
E126
tests/test_http_request.py
E402
E501
E128
E128
tests/test_http_response.py
E501
E128
tests/test_item.py
E128
F841
tests/test_link.py
E501
...
...
@@ -211,7 +211,7 @@ flake8-ignore =
tests/test_logformatter.py
E128
E501
tests/test_mail.py
E128
E501
tests/test_middleware.py
E501
E128
tests/test_pipeline_crawl.py
E501
E128
E126
tests/test_pipeline_crawl.py
E501
E128
tests/test_pipeline_files.py
E501
tests/test_pipeline_images.py
F841
E501
tests/test_pipeline_media.py
E501
E741
E128
...
...
@@ -219,7 +219,7 @@ flake8-ignore =
tests/test_request_cb_kwargs.py
E501
tests/test_responsetypes.py
E501
tests/test_robotstxt_interface.py
E501
E501
tests/test_scheduler.py
E501
E126
tests/test_scheduler.py
E501
tests/test_selector.py
E501
tests/test_spider.py
E501
tests/test_spidermiddleware.py
E501
...
...
@@ -243,8 +243,8 @@ flake8-ignore =
tests/test_utils_response.py
E501
tests/test_utils_signal.py
E741
F841
tests/test_utils_sitemap.py
E128
E501
tests/test_utils_url.py
E501
E501
E126
tests/test_webclient.py
E501
E128
E402
E126
tests/test_utils_url.py
E501
E501
tests/test_webclient.py
E501
E128
E402
tests/test_cmdline/__init__.py
E501
tests/test_settings/__init__.py
E501
E128
tests/test_spiderloader/__init__.py
E128
E501
...
...
scrapy/core/downloader/contextfactory.py
浏览文件 @
4c12a234
...
...
@@ -86,8 +86,8 @@ class BrowserLikeContextFactory(ScrapyClientContextFactory):
#
# This means that a website like https://www.cacert.org will be rejected
# by default, since CAcert.org CA certificate is seldom shipped.
return
optionsForClientTLS
(
hostname
.
decode
(
"ascii"
),
trustRoot
=
platformTrust
(
),
extraCertificateOptions
=
{
'method'
:
self
.
_ssl_method
,
}
)
return
optionsForClientTLS
(
hostname
=
hostname
.
decode
(
"ascii"
),
trustRoot
=
platformTrust
(),
extraCertificateOptions
=
{
'method'
:
self
.
_ssl_method
}
,
)
scrapy/core/downloader/handlers/s3.py
浏览文件 @
4c12a234
...
...
@@ -100,11 +100,12 @@ class S3DownloadHandler:
url
=
url
,
headers
=
awsrequest
.
headers
.
items
())
else
:
signed_headers
=
self
.
conn
.
make_request
(
method
=
request
.
method
,
bucket
=
bucket
,
key
=
unquote
(
p
.
path
),
query_args
=
unquote
(
p
.
query
),
headers
=
request
.
headers
,
data
=
request
.
body
)
method
=
request
.
method
,
bucket
=
bucket
,
key
=
unquote
(
p
.
path
),
query_args
=
unquote
(
p
.
query
),
headers
=
request
.
headers
,
data
=
request
.
body
,
)
request
=
request
.
replace
(
url
=
url
,
headers
=
signed_headers
)
return
self
.
_download_http
(
request
,
spider
)
scrapy/core/downloader/webclient.py
浏览文件 @
4c12a234
...
...
@@ -88,8 +88,8 @@ class ScrapyHTTPPageGetter(HTTPClient):
self
.
transport
.
stopProducing
()
self
.
factory
.
noPage
(
defer
.
TimeoutError
(
"Getting %s took longer than %s seconds."
%
(
self
.
factory
.
url
,
self
.
factory
.
timeout
)))
defer
.
TimeoutError
(
"Getting %s took longer than %s seconds."
%
(
self
.
factory
.
url
,
self
.
factory
.
timeout
)))
class
ScrapyHTTPClientFactory
(
HTTPClientFactory
):
...
...
scrapy/downloadermiddlewares/retry.py
浏览文件 @
4c12a234
...
...
@@ -12,9 +12,15 @@ once the spider has finished crawling all regular (non failed) pages.
import
logging
from
twisted.internet
import
defer
from
twisted.internet.error
import
TimeoutError
,
DNSLookupError
,
\
ConnectionRefusedError
,
ConnectionDone
,
ConnectError
,
\
ConnectionLost
,
TCPTimedOutError
from
twisted.internet.error
import
(
ConnectError
,
ConnectionDone
,
ConnectionLost
,
ConnectionRefusedError
,
DNSLookupError
,
TCPTimedOutError
,
TimeoutError
,
)
from
twisted.web.client
import
ResponseFailed
from
scrapy.exceptions
import
NotConfigured
...
...
scrapy/spiderloader.py
浏览文件 @
4c12a234
...
...
@@ -24,15 +24,17 @@ class SpiderLoader:
self
.
_load_all_spiders
()
def
_check_name_duplicates
(
self
):
dupes
=
[
"
\n
"
.
join
(
" {cls} named {name!r} (in {module})"
.
format
(
module
=
mod
,
cls
=
cls
,
name
=
name
)
for
(
mod
,
cls
)
in
locations
)
for
name
,
locations
in
self
.
_found
.
items
()
if
len
(
locations
)
>
1
]
dupes
=
[]
for
name
,
locations
in
self
.
_found
.
items
():
dupes
.
extend
([
" {cls} named {name!r} (in {module})"
.
format
(
module
=
mod
,
cls
=
cls
,
name
=
name
)
for
mod
,
cls
in
locations
])
if
dupes
:
dupes_string
=
"
\n\n
"
.
join
(
dupes
)
msg
=
(
"There are several spiders with the same name:
\n\n
"
"{}
\n\n
This can cause unexpected behavior."
.
format
(
"
\n\n
"
.
join
(
dupes
)))
"{}
\n\n
This can cause unexpected behavior."
.
format
(
dupes_string
))
warnings
.
warn
(
msg
,
UserWarning
)
def
_load_spiders
(
self
,
module
):
...
...
@@ -45,11 +47,12 @@ class SpiderLoader:
try
:
for
module
in
walk_modules
(
name
):
self
.
_load_spiders
(
module
)
except
ImportError
as
e
:
except
ImportError
:
if
self
.
warn_only
:
msg
=
(
"
\n
{tb}Could not load spiders from module '{modname}'. "
"See above traceback for details."
.
format
(
modname
=
name
,
tb
=
traceback
.
format_exc
()))
msg
=
(
"
\n
{tb}Could not load spiders from module '{modname}'. "
"See above traceback for details."
.
format
(
modname
=
name
,
tb
=
traceback
.
format_exc
())
)
warnings
.
warn
(
msg
,
RuntimeWarning
)
else
:
raise
...
...
tests/test_downloadermiddleware_cookies.py
浏览文件 @
4c12a234
...
...
@@ -139,10 +139,12 @@ class CookiesMiddlewareTest(TestCase):
def
test_complex_cookies
(
self
):
# merge some cookies into jar
cookies
=
[{
'name'
:
'C1'
,
'value'
:
'value1'
,
'path'
:
'/foo'
,
'domain'
:
'scrapytest.org'
},
{
'name'
:
'C2'
,
'value'
:
'value2'
,
'path'
:
'/bar'
,
'domain'
:
'scrapytest.org'
},
{
'name'
:
'C3'
,
'value'
:
'value3'
,
'path'
:
'/foo'
,
'domain'
:
'scrapytest.org'
},
{
'name'
:
'C4'
,
'value'
:
'value4'
,
'path'
:
'/foo'
,
'domain'
:
'scrapy.org'
}]
cookies
=
[
{
'name'
:
'C1'
,
'value'
:
'value1'
,
'path'
:
'/foo'
,
'domain'
:
'scrapytest.org'
},
{
'name'
:
'C2'
,
'value'
:
'value2'
,
'path'
:
'/bar'
,
'domain'
:
'scrapytest.org'
},
{
'name'
:
'C3'
,
'value'
:
'value3'
,
'path'
:
'/foo'
,
'domain'
:
'scrapytest.org'
},
{
'name'
:
'C4'
,
'value'
:
'value4'
,
'path'
:
'/foo'
,
'domain'
:
'scrapy.org'
},
]
req
=
Request
(
'http://scrapytest.org/'
,
cookies
=
cookies
)
self
.
mw
.
process_request
(
req
,
self
.
spider
)
...
...
tests/test_downloadermiddleware_retry.py
浏览文件 @
4c12a234
import
unittest
from
twisted.internet
import
defer
from
twisted.internet.error
import
TimeoutError
,
DNSLookupError
,
\
ConnectionRefusedError
,
ConnectionDone
,
ConnectError
,
\
ConnectionLost
,
TCPTimedOutError
from
twisted.internet.error
import
(
ConnectError
,
ConnectionDone
,
ConnectionLost
,
ConnectionRefusedError
,
DNSLookupError
,
TCPTimedOutError
,
TimeoutError
,
)
from
twisted.web.client
import
ResponseFailed
from
scrapy.downloadermiddlewares.retry
import
RetryMiddleware
...
...
tests/test_http_request.py
浏览文件 @
4c12a234
...
...
@@ -502,11 +502,13 @@ class FormRequestTest(RequestTest):
def
test_from_response_duplicate_form_key
(
self
):
response
=
_buildresponse
(
'<form></form>'
,
url
=
'http://www.example.com'
)
req
=
self
.
request_class
.
from_response
(
response
,
method
=
'GET'
,
formdata
=
((
'foo'
,
'bar'
),
(
'foo'
,
'baz'
)))
'<form></form>'
,
url
=
'http://www.example.com'
)
req
=
self
.
request_class
.
from_response
(
response
=
response
,
method
=
'GET'
,
formdata
=
((
'foo'
,
'bar'
),
(
'foo'
,
'baz'
)),
)
self
.
assertEqual
(
urlparse
(
req
.
url
).
hostname
,
'www.example.com'
)
self
.
assertEqual
(
urlparse
(
req
.
url
).
query
,
'foo=bar&foo=baz'
)
...
...
@@ -530,9 +532,11 @@ class FormRequestTest(RequestTest):
<input type="hidden" name="test" value="val2">
<input type="hidden" name="test2" value="xxx">
</form>"""
)
req
=
self
.
request_class
.
from_response
(
response
,
formdata
=
{
'one'
:
[
'two'
,
'three'
],
'six'
:
'seven'
},
headers
=
{
"Accept-Encoding"
:
"gzip,deflate"
})
req
=
self
.
request_class
.
from_response
(
response
=
response
,
formdata
=
{
'one'
:
[
'two'
,
'three'
],
'six'
:
'seven'
},
headers
=
{
"Accept-Encoding"
:
"gzip,deflate"
},
)
self
.
assertEqual
(
req
.
method
,
'POST'
)
self
.
assertEqual
(
req
.
headers
[
'Content-type'
],
b
'application/x-www-form-urlencoded'
)
self
.
assertEqual
(
req
.
headers
[
'Accept-Encoding'
],
b
'gzip,deflate'
)
...
...
@@ -580,9 +584,9 @@ class FormRequestTest(RequestTest):
def
test_from_response_override_method
(
self
):
response
=
_buildresponse
(
'''<html><body>
<form action="/app"></form>
</body></html>'''
)
'''<html><body>
<form action="/app"></form>
</body></html>'''
)
request
=
FormRequest
.
from_response
(
response
)
self
.
assertEqual
(
request
.
method
,
'GET'
)
request
=
FormRequest
.
from_response
(
response
,
method
=
'POST'
)
...
...
@@ -590,9 +594,9 @@ class FormRequestTest(RequestTest):
def
test_from_response_override_url
(
self
):
response
=
_buildresponse
(
'''<html><body>
<form action="/app"></form>
</body></html>'''
)
'''<html><body>
<form action="/app"></form>
</body></html>'''
)
request
=
FormRequest
.
from_response
(
response
)
self
.
assertEqual
(
request
.
url
,
'http://example.com/app'
)
request
=
FormRequest
.
from_response
(
response
,
url
=
'http://foo.bar/absolute'
)
...
...
tests/test_pipeline_crawl.py
浏览文件 @
4c12a234
...
...
@@ -44,9 +44,7 @@ class RedirectedMediaDownloadSpider(MediaDownloadSpider):
name
=
'redirectedmedia'
def
_process_url
(
self
,
url
):
return
add_or_replace_parameter
(
self
.
mockserver
.
url
(
'/redirect-to'
),
'goto'
,
url
)
return
add_or_replace_parameter
(
self
.
mockserver
.
url
(
'/redirect-to'
),
'goto'
,
url
)
class
FileDownloadCrawlTestCase
(
TestCase
):
...
...
tests/test_webclient.py
浏览文件 @
4c12a234
...
...
@@ -18,6 +18,14 @@ except ImportError:
from
twisted.python.filepath
import
FilePath
from
twisted.protocols.policies
import
WrappingFactory
from
twisted.internet.defer
import
inlineCallbacks
from
twisted.web.test.test_webclient
import
(
ForeverTakingResource
,
ErrorResource
,
NoLengthResource
,
HostHeaderResource
,
PayloadResource
,
BrokenDownloadResource
,
)
from
scrapy.core.downloader
import
webclient
as
client
from
scrapy.core.downloader.contextfactory
import
ScrapyClientContextFactory
...
...
@@ -202,11 +210,6 @@ class ScrapyHTTPPageGetterTests(unittest.TestCase):
Headers
({
'Hello'
:
[
'World'
],
'Foo'
:
[
'Bar'
]}))
from
twisted.web.test.test_webclient
import
ForeverTakingResource
,
\
ErrorResource
,
NoLengthResource
,
HostHeaderResource
,
\
PayloadResource
,
BrokenDownloadResource
class
EncodingResource
(
resource
.
Resource
):
out_encoding
=
'cp1251'
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录