Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
looyolo
scrapy
提交
d0af0086
S
scrapy
项目概览
looyolo
/
scrapy
与 Fork 源项目一致
从无法访问的项目Fork
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
S
scrapy
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
d0af0086
编写于
12月 16, 2020
作者:
A
Adrián Chaves
提交者:
GitHub
12月 16, 2020
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4878 from elacuesta/response-protocol-attribute
Response.protocol attribute
上级
212163e1
2405df49
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
70 addition
and
6 deletion
+70
-6
docs/topics/request-response.rst
docs/topics/request-response.rst
+23
-0
scrapy/core/downloader/handlers/http11.py
scrapy/core/downloader/handlers/http11.py
+6
-0
scrapy/core/downloader/webclient.py
scrapy/core/downloader/webclient.py
+2
-2
scrapy/http/response/__init__.py
scrapy/http/response/__init__.py
+16
-4
tests/test_downloader_handlers.py
tests/test_downloader_handlers.py
+23
-0
未找到文件。
docs/topics/request-response.rst
浏览文件 @
d0af0086
...
...
@@ -693,9 +693,19 @@ Response objects
:param ip_address: The IP address of the server from which the Response originated.
:type ip_address: :class:`ipaddress.IPv4Address` or :class:`ipaddress.IPv6Address`
:param protocol: The protocol that was used to download the response.
For instance: "HTTP/1.0", "HTTP/1.1"
:type protocol: :class:`str`
.. versionadded:: 2.0.0
The ``certificate`` parameter.
.. versionadded:: 2.1.0
The ``ip_address`` parameter.
.. versionadded:: VERSION
The ``protocol`` parameter.
.. attribute:: Response.url
A string containing the URL of the response.
...
...
@@ -780,6 +790,8 @@ Response objects
.. attribute:: Response.certificate
.. versionadded:: 2.0.0
A :class:`twisted.internet.ssl.Certificate` object representing
the server's SSL certificate.
...
...
@@ -795,6 +807,17 @@ Response objects
handler, i.e. for ``http(s)`` responses. For other handlers,
:attr:`ip_address` is always ``None``.
.. attribute:: Response.protocol
.. versionadded:: VERSION
The protocol that was used to download the response.
For instance: "HTTP/1.0", "HTTP/1.1"
This attribute is currently only populated by the HTTP download
handlers, i.e. for ``http(s)`` responses. For other handlers,
:attr:`protocol` is always ``None``.
.. method:: Response.copy()
Returns a new Response which is a copy of this Response.
...
...
scrapy/core/downloader/handlers/http11.py
浏览文件 @
d0af0086
...
...
@@ -434,6 +434,11 @@ class ScrapyAgent:
def
_cb_bodydone
(
self
,
result
,
request
,
url
):
headers
=
Headers
(
result
[
"txresponse"
].
headers
.
getAllRawHeaders
())
respcls
=
responsetypes
.
from_args
(
headers
=
headers
,
url
=
url
,
body
=
result
[
"body"
])
try
:
version
=
result
[
"txresponse"
].
version
protocol
=
f
"
{
to_unicode
(
version
[
0
])
}
/
{
version
[
1
]
}
.
{
version
[
2
]
}
"
except
(
AttributeError
,
TypeError
,
IndexError
):
protocol
=
None
response
=
respcls
(
url
=
url
,
status
=
int
(
result
[
"txresponse"
].
code
),
...
...
@@ -442,6 +447,7 @@ class ScrapyAgent:
flags
=
result
[
"flags"
],
certificate
=
result
[
"certificate"
],
ip_address
=
result
[
"ip_address"
],
protocol
=
protocol
,
)
if
result
.
get
(
"failure"
):
result
[
"failure"
].
value
.
response
=
response
...
...
scrapy/core/downloader/webclient.py
浏览文件 @
d0af0086
...
...
@@ -7,7 +7,7 @@ from twisted.internet.protocol import ClientFactory
from
scrapy.http
import
Headers
from
scrapy.utils.httpobj
import
urlparse_cached
from
scrapy.utils.python
import
to_bytes
from
scrapy.utils.python
import
to_bytes
,
to_unicode
from
scrapy.responsetypes
import
responsetypes
...
...
@@ -110,7 +110,7 @@ class ScrapyHTTPClientFactory(ClientFactory):
status
=
int
(
self
.
status
)
headers
=
Headers
(
self
.
response_headers
)
respcls
=
responsetypes
.
from_args
(
headers
=
headers
,
url
=
self
.
_url
)
return
respcls
(
url
=
self
.
_url
,
status
=
status
,
headers
=
headers
,
body
=
body
)
return
respcls
(
url
=
self
.
_url
,
status
=
status
,
headers
=
headers
,
body
=
body
,
protocol
=
to_unicode
(
self
.
version
)
)
def
_set_connection_attributes
(
self
,
request
):
parsed
=
urlparse_cached
(
request
)
...
...
scrapy/http/response/__init__.py
浏览文件 @
d0af0086
...
...
@@ -17,8 +17,18 @@ from scrapy.utils.trackref import object_ref
class
Response
(
object_ref
):
def
__init__
(
self
,
url
,
status
=
200
,
headers
=
None
,
body
=
b
''
,
flags
=
None
,
request
=
None
,
certificate
=
None
,
ip_address
=
None
):
def
__init__
(
self
,
url
,
status
=
200
,
headers
=
None
,
body
=
b
""
,
flags
=
None
,
request
=
None
,
certificate
=
None
,
ip_address
=
None
,
protocol
=
None
,
):
self
.
headers
=
Headers
(
headers
or
{})
self
.
status
=
int
(
status
)
self
.
_set_body
(
body
)
...
...
@@ -27,6 +37,7 @@ class Response(object_ref):
self
.
flags
=
[]
if
flags
is
None
else
list
(
flags
)
self
.
certificate
=
certificate
self
.
ip_address
=
ip_address
self
.
protocol
=
protocol
@
property
def
cb_kwargs
(
self
):
...
...
@@ -89,8 +100,9 @@ class Response(object_ref):
"""Create a new Response with the same attributes except for those
given new values.
"""
for
x
in
[
'url'
,
'status'
,
'headers'
,
'body'
,
'request'
,
'flags'
,
'certificate'
,
'ip_address'
]:
for
x
in
[
"url"
,
"status"
,
"headers"
,
"body"
,
"request"
,
"flags"
,
"certificate"
,
"ip_address"
,
"protocol"
,
]:
kwargs
.
setdefault
(
x
,
getattr
(
self
,
x
))
cls
=
kwargs
.
pop
(
'cls'
,
self
.
__class__
)
return
cls
(
*
args
,
**
kwargs
)
...
...
tests/test_downloader_handlers.py
浏览文件 @
d0af0086
...
...
@@ -115,6 +115,7 @@ class FileTestCase(unittest.TestCase):
self
.
assertEqual
(
response
.
url
,
request
.
url
)
self
.
assertEqual
(
response
.
status
,
200
)
self
.
assertEqual
(
response
.
body
,
b
'0123456789'
)
self
.
assertEqual
(
response
.
protocol
,
None
)
request
=
Request
(
path_to_file_uri
(
self
.
tmpname
+
'^'
))
assert
request
.
url
.
upper
().
endswith
(
'%5E'
)
...
...
@@ -360,6 +361,13 @@ class Http10TestCase(HttpTestCase):
"""HTTP 1.0 test case"""
download_handler_cls
=
HTTP10DownloadHandler
def
test_protocol
(
self
):
request
=
Request
(
self
.
getURL
(
"host"
),
method
=
"GET"
)
d
=
self
.
download_request
(
request
,
Spider
(
"foo"
))
d
.
addCallback
(
lambda
r
:
r
.
protocol
)
d
.
addCallback
(
self
.
assertEqual
,
"HTTP/1.0"
)
return
d
class
Https10TestCase
(
Http10TestCase
):
scheme
=
'https'
...
...
@@ -489,6 +497,13 @@ class Http11TestCase(HttpTestCase):
def
test_download_broken_chunked_content_allow_data_loss_via_setting
(
self
):
return
self
.
test_download_broken_content_allow_data_loss_via_setting
(
'broken-chunked'
)
def
test_protocol
(
self
):
request
=
Request
(
self
.
getURL
(
"host"
),
method
=
"GET"
)
d
=
self
.
download_request
(
request
,
Spider
(
"foo"
))
d
.
addCallback
(
lambda
r
:
r
.
protocol
)
d
.
addCallback
(
self
.
assertEqual
,
"HTTP/1.1"
)
return
d
class
Https11TestCase
(
Http11TestCase
):
scheme
=
'https'
...
...
@@ -962,6 +977,7 @@ class BaseFTPTestCase(unittest.TestCase):
self
.
assertEqual
(
r
.
status
,
200
)
self
.
assertEqual
(
r
.
body
,
b
'I have the power!'
)
self
.
assertEqual
(
r
.
headers
,
{
b
'Local Filename'
:
[
b
''
],
b
'Size'
:
[
b
'17'
]})
self
.
assertIsNone
(
r
.
protocol
)
return
self
.
_add_test_callbacks
(
d
,
_test
)
def
test_ftp_download_path_with_spaces
(
self
):
...
...
@@ -1120,3 +1136,10 @@ class DataURITestCase(unittest.TestCase):
request
=
Request
(
'data:text/plain;base64,SGVsbG8sIHdvcmxkLg%3D%3D'
)
return
self
.
download_request
(
request
,
self
.
spider
).
addCallback
(
_test
)
def
test_protocol
(
self
):
def
_test
(
response
):
self
.
assertIsNone
(
response
.
protocol
)
request
=
Request
(
"data:,"
)
return
self
.
download_request
(
request
,
self
.
spider
).
addCallback
(
_test
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录