Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
looyolo
scrapy
提交
79816980
S
scrapy
项目概览
looyolo
/
scrapy
与 Fork 源项目一致
从无法访问的项目Fork
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
S
scrapy
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
79816980
编写于
2月 28, 2012
作者:
D
Daniel Graña
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Adapt response encoding detection to pass test cases
上级
c476681c
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
17 addition
and
32 deletion
+17
-32
scrapy/http/response/text.py
scrapy/http/response/text.py
+15
-19
scrapy/tests/test_http_response.py
scrapy/tests/test_http_response.py
+2
-2
scrapy/utils/encoding.py
scrapy/utils/encoding.py
+0
-11
未找到文件。
scrapy/http/response/text.py
浏览文件 @
79816980
...
...
@@ -6,10 +6,9 @@ See documentation in docs/topics/request-response.rst
"""
from
w3lib.encoding
import
html_to_unicode
,
resolve_encoding
,
\
html_body_declared_encoding
,
http_content_type_encoding
html_body_declared_encoding
,
http_content_type_encoding
,
to_unicode
from
scrapy.http.response
import
Response
from
scrapy.utils.python
import
memoizemethod_noargs
from
scrapy.utils.encoding
import
encoding_exists
from
scrapy.conf
import
settings
...
...
@@ -48,17 +47,7 @@ class TextResponse(Response):
@
property
def
encoding
(
self
):
return
self
.
_get_encoding
(
infer
=
True
)
def
_get_encoding
(
self
,
infer
=
False
):
enc
=
self
.
_declared_encoding
()
if
enc
and
not
encoding_exists
(
enc
):
enc
=
None
if
not
enc
and
infer
:
enc
=
self
.
_body_inferred_encoding
()
if
not
enc
:
enc
=
self
.
_DEFAULT_ENCODING
return
resolve_encoding
(
enc
)
return
self
.
_declared_encoding
()
or
self
.
_body_inferred_encoding
()
def
_declared_encoding
(
self
):
return
self
.
_encoding
or
self
.
_headers_encoding
()
\
...
...
@@ -67,7 +56,7 @@ class TextResponse(Response):
def
body_as_unicode
(
self
):
"""Return body as unicode"""
if
self
.
_cached_ubody
is
None
:
self
.
_cached_ubody
=
self
.
body
.
decode
(
self
.
encoding
,
'scrapy_replace'
)
self
.
_cached_ubody
=
to_unicode
(
self
.
body
,
self
.
encoding
)
return
self
.
_cached_ubody
@
memoizemethod_noargs
...
...
@@ -78,14 +67,21 @@ class TextResponse(Response):
def
_body_inferred_encoding
(
self
):
if
self
.
_cached_benc
is
None
:
content_type
=
self
.
headers
.
get
(
'Content-Type'
)
benc
,
_
=
html_to_unicode
(
content_type
,
self
.
body
,
default_encoding
=
self
.
_DEFAULT_ENCODING
)
benc
,
ubody
=
html_to_unicode
(
content_type
,
self
.
body
,
\
auto_detect_fun
=
self
.
_auto_detect_fun
,
\
default_encoding
=
self
.
_DEFAULT_ENCODING
)
self
.
_cached_benc
=
benc
# XXX: is this needed?
# UnicodeDammit is buggy decoding utf-16
#if self._cached_ubody is None and benc != 'utf-16':
# self._cached_ubody = dammit.unicode
self
.
_cached_ubody
=
ubody
return
self
.
_cached_benc
def
_auto_detect_fun
(
self
,
text
):
for
enc
in
(
self
.
_DEFAULT_ENCODING
,
'utf-8'
,
'cp1252'
):
try
:
text
.
decode
(
enc
)
except
UnicodeError
:
continue
return
resolve_encoding
(
enc
)
@
memoizemethod_noargs
def
_body_declared_encoding
(
self
):
return
html_body_declared_encoding
(
self
.
body
)
scrapy/tests/test_http_response.py
浏览文件 @
79816980
...
...
@@ -171,8 +171,8 @@ class TextResponseTest(BaseResponseTest):
self
.
assertEqual
(
r2
.
_headers_encoding
(),
None
)
self
.
assertEqual
(
r2
.
_declared_encoding
(),
'utf-8'
)
self
.
_assert_response_encoding
(
r2
,
'utf-8'
)
self
.
assertEqual
(
r3
.
_headers_encoding
(),
"
iso-8859-1
"
)
self
.
assertEqual
(
r3
.
_declared_encoding
(),
"
iso-8859-1
"
)
self
.
assertEqual
(
r3
.
_headers_encoding
(),
"
cp1252
"
)
self
.
assertEqual
(
r3
.
_declared_encoding
(),
"
cp1252
"
)
self
.
assertEqual
(
r4
.
_headers_encoding
(),
None
)
self
.
assertEqual
(
r5
.
_headers_encoding
(),
None
)
self
.
_assert_response_encoding
(
r5
,
"utf-8"
)
...
...
scrapy/utils/encoding.py
已删除
100644 → 0
浏览文件 @
c476681c
import
codecs
from
w3lib.encoding
import
resolve_encoding
def
encoding_exists
(
encoding
):
"""Returns ``True`` if encoding is valid, otherwise returns ``False``"""
try
:
codecs
.
lookup
(
resolve_encoding
(
encoding
))
except
LookupError
:
return
False
return
True
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录