Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
looyolo
scrapy
提交
a69f042d
S
scrapy
项目概览
looyolo
/
scrapy
与 Fork 源项目一致
从无法访问的项目Fork
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
S
scrapy
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
a69f042d
编写于
11月 19, 2014
作者:
T
tpeng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add 2 more test cases and minor doc fixes
上级
fa84730e
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
48 addition
and
7 deletion
+48
-7
docs/topics/settings.rst
docs/topics/settings.rst
+7
-3
scrapy/settings/default_settings.py
scrapy/settings/default_settings.py
+2
-2
tests/mockserver.py
tests/mockserver.py
+5
-2
tests/test_downloader_handlers.py
tests/test_downloader_handlers.py
+34
-0
未找到文件。
docs/topics/settings.rst
浏览文件 @
a69f042d
...
...
@@ -427,7 +427,7 @@ The amount of time (in secs) that the downloader will wait before timing out.
DOWNLOAD_MAXSIZE
----------------
Default: `1073741824` (1024M
b
)
Default: `1073741824` (1024M
B
)
The maximum response size (in bytes) that downloader will download.
...
...
@@ -439,12 +439,14 @@ If you want to disable it set to 0.
spider attribute and per-request using :reqmeta:`download_maxsize`
Request.meta key.
This feature needs Twisted >= 11.1.
.. setting:: DOWNLOAD_WARNSIZE
DOWNLOAD_WARNSIZE
----------------
----------------
-
Default: `33554432` (32M
b
)
Default: `33554432` (32M
B
)
The response size (in bytes) that downloader will start to warn.
...
...
@@ -456,6 +458,8 @@ If you want to disable it set to 0.
spider attribute and per-request using :reqmeta:`download_warnsize`
Request.meta key.
This feature needs Twisted >= 11.1.
.. setting:: DUPEFILTER_CLASS
DUPEFILTER_CLASS
...
...
scrapy/settings/default_settings.py
浏览文件 @
a69f042d
...
...
@@ -66,8 +66,8 @@ DOWNLOAD_HANDLERS_BASE = {
DOWNLOAD_TIMEOUT
=
180
# 3mins
DOWNLOAD_MAXSIZE
=
10
737418
24
# 1024m
DOWNLOAD_WARNSIZE
=
3
3554432
# 32m
DOWNLOAD_MAXSIZE
=
10
24
*
1024
*
10
24
# 1024m
DOWNLOAD_WARNSIZE
=
3
2
*
1024
*
1024
# 32m
DOWNLOADER
=
'scrapy.core.downloader.Downloader'
...
...
tests/mockserver.py
浏览文件 @
a69f042d
from
__future__
import
print_function
import
sys
,
time
,
random
,
urllib
,
os
,
json
from
subprocess
import
Popen
,
PIPE
from
twisted.web.server
import
Site
,
NOT_DONE_YET
from
twisted.web.resource
import
Resource
from
twisted.web.server
import
Site
,
NOT_DONE_YET
,
GzipEncoderFactory
from
twisted.web.resource
import
Resource
,
EncodingResourceWrapper
from
twisted.internet
import
reactor
,
defer
,
ssl
from
twisted.web.test.test_webclient
import
PayloadResource
from
scrapy
import
twisted_version
...
...
@@ -167,6 +168,8 @@ class Root(Resource):
self
.
putChild
(
"drop"
,
Drop
())
self
.
putChild
(
"raw"
,
Raw
())
self
.
putChild
(
"echo"
,
Echo
())
self
.
putChild
(
'payload'
,
PayloadResource
())
self
.
putChild
(
"xpayload"
,
EncodingResourceWrapper
(
PayloadResource
(),
[
GzipEncoderFactory
()]))
def
getChild
(
self
,
name
,
request
):
return
self
...
...
tests/test_downloader_handlers.py
浏览文件 @
a69f042d
...
...
@@ -220,6 +220,20 @@ class Http11TestCase(HttpTestCase):
d
.
addCallback
(
self
.
assertEquals
,
"0123456789"
)
return
d
@
defer
.
inlineCallbacks
def
test_download_with_maxsize
(
self
):
request
=
Request
(
self
.
getURL
(
'file'
))
# 10 is minimal size for this request and the limit is only counted on
# response body. (regardless of headers)
d
=
self
.
download_request
(
request
,
Spider
(
'foo'
,
download_maxsize
=
10
))
d
.
addCallback
(
lambda
r
:
r
.
body
)
d
.
addCallback
(
self
.
assertEquals
,
"0123456789"
)
yield
d
d
=
self
.
download_request
(
request
,
Spider
(
'foo'
,
download_maxsize
=
9
))
yield
self
.
assertFailure
(
d
,
defer
.
CancelledError
,
error
.
ConnectionAborted
)
@
defer
.
inlineCallbacks
def
test_download_with_maxsize_per_req
(
self
):
meta
=
{
'download_maxsize'
:
2
}
...
...
@@ -271,6 +285,26 @@ class Http11MockServerTestCase(unittest.TestCase):
reason
=
crawler
.
spider
.
meta
[
'close_reason'
]
self
.
assertTrue
(
reason
,
'finished'
)
@
defer
.
inlineCallbacks
def
test_download_gzip_response
(
self
):
crawler
=
get_crawler
(
SingleRequestSpider
)
body
=
'1'
*
100
# PayloadResource requires body length to be 100
request
=
Request
(
'http://localhost:8998/payload'
,
method
=
'POST'
,
body
=
body
,
meta
=
{
'download_maxsize'
:
50
})
yield
crawler
.
crawl
(
seed
=
request
)
failure
=
crawler
.
spider
.
meta
[
'failure'
]
# download_maxsize < 100, hence the CancelledError
self
.
assertIsInstance
(
failure
.
value
,
defer
.
CancelledError
)
request
.
headers
.
setdefault
(
'Accept-Encoding'
,
'gzip,deflate'
)
request
=
request
.
replace
(
url
=
'http://localhost:8998/xpayload'
)
yield
crawler
.
crawl
(
seed
=
request
)
# download_maxsize = 50 is enough for the gzipped response
failure
=
crawler
.
spider
.
meta
.
get
(
'failure'
)
self
.
assertTrue
(
failure
==
None
)
reason
=
crawler
.
spider
.
meta
[
'close_reason'
]
self
.
assertTrue
(
reason
,
'finished'
)
class
UriResource
(
resource
.
Resource
):
"""Return the full uri that was requested"""
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录