Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
looyolo
scrapy
提交
10bcdb49
S
scrapy
项目概览
looyolo
/
scrapy
与 Fork 源项目一致
从无法访问的项目Fork
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
S
scrapy
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
10bcdb49
编写于
2月 23, 2016
作者:
E
Elias Dorneles
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1787 from scrapy/improve-errors
[MRG+1] Better tracebacks
上级
62a51716
18a38181
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
49 addition
and
23 deletion
+49
-23
scrapy/crawler.py
scrapy/crawler.py
+13
-2
tests/test_commands.py
tests/test_commands.py
+36
-21
未找到文件。
scrapy/crawler.py
浏览文件 @
10bcdb49
...
...
@@ -3,6 +3,7 @@ import signal
import
logging
import
warnings
import
sys
from
twisted.internet
import
reactor
,
defer
from
zope.interface.verify
import
verifyClass
,
DoesNotImplement
...
...
@@ -73,11 +74,21 @@ class Crawler(object):
yield
self
.
engine
.
open_spider
(
self
.
spider
,
start_requests
)
yield
defer
.
maybeDeferred
(
self
.
engine
.
start
)
except
Exception
:
exc
=
defer
.
fail
()
# In Python 2 reraising an exception after yield discards
# the original traceback (see http://bugs.python.org/issue7563),
# so sys.exc_info() workaround is used.
# This workaround also works in Python 3, but it is not needed,
# and it is slower, so in Python 3 we use native `raise`.
if
six
.
PY2
:
exc_info
=
sys
.
exc_info
()
self
.
crawling
=
False
if
self
.
engine
is
not
None
:
yield
self
.
engine
.
close
()
yield
exc
if
six
.
PY2
:
six
.
reraise
(
*
exc_info
)
raise
def
_create_spider
(
self
,
*
args
,
**
kwargs
):
return
self
.
spidercls
.
from_crawler
(
self
,
*
args
,
**
kwargs
)
...
...
tests/test_commands.py
浏览文件 @
10bcdb49
...
...
@@ -6,7 +6,7 @@ from time import sleep
from
os.path
import
exists
,
join
,
abspath
from
shutil
import
rmtree
,
copytree
from
tempfile
import
mkdtemp
import
six
from
contextlib
import
contextmanager
from
twisted.trial
import
unittest
from
twisted.internet
import
defer
...
...
@@ -154,12 +154,24 @@ class MiscCommandsTest(CommandTest):
class
RunSpiderCommandTest
(
CommandTest
):
def
test_runspider
(
self
):
@
contextmanager
def
_create_file
(
self
,
content
,
name
):
tmpdir
=
self
.
mktemp
()
os
.
mkdir
(
tmpdir
)
fname
=
abspath
(
join
(
tmpdir
,
'myspider.py'
))
fname
=
abspath
(
join
(
tmpdir
,
name
))
with
open
(
fname
,
'w'
)
as
f
:
f
.
write
(
"""
f
.
write
(
content
)
try
:
yield
fname
finally
:
rmtree
(
tmpdir
)
def
runspider
(
self
,
code
,
name
=
'myspider.py'
):
with
self
.
_create_file
(
code
,
name
)
as
fname
:
return
self
.
proc
(
'runspider'
,
fname
)
def
test_runspider
(
self
):
spider
=
"""
import scrapy
class MySpider(scrapy.Spider):
...
...
@@ -168,23 +180,17 @@ class MySpider(scrapy.Spider):
def start_requests(self):
self.logger.debug("It Works!")
return []
"""
)
p
=
self
.
proc
(
'runspider'
,
fname
)
"""
p
=
self
.
runspider
(
spider
)
log
=
to_native_str
(
p
.
stderr
.
read
())
self
.
assertIn
(
"DEBUG: It Works!"
,
log
)
self
.
assertIn
(
"INFO: Spider opened"
,
log
)
self
.
assertIn
(
"INFO: Closing spider (finished)"
,
log
)
self
.
assertIn
(
"INFO: Spider closed (finished)"
,
log
)
def
test_runspider_no_spider_found
(
self
):
tmpdir
=
self
.
mktemp
()
os
.
mkdir
(
tmpdir
)
fname
=
abspath
(
join
(
tmpdir
,
'myspider.py'
))
with
open
(
fname
,
'w'
)
as
f
:
f
.
write
(
"""
from scrapy.spiders import Spider
"""
)
p
=
self
.
proc
(
'runspider'
,
fname
)
p
=
self
.
runspider
(
"from scrapy.spiders import Spider
\n
"
)
log
=
to_native_str
(
p
.
stderr
.
read
())
self
.
assertIn
(
"No spider found in file"
,
log
)
...
...
@@ -194,14 +200,23 @@ from scrapy.spiders import Spider
self
.
assertIn
(
"File not found: some_non_existent_file"
,
log
)
def
test_runspider_unable_to_load
(
self
):
tmpdir
=
self
.
mktemp
()
os
.
mkdir
(
tmpdir
)
fname
=
abspath
(
join
(
tmpdir
,
'myspider.txt'
))
with
open
(
fname
,
'w'
)
as
f
:
f
.
write
(
""
)
p
=
self
.
proc
(
'runspider'
,
fname
)
p
=
self
.
runspider
(
''
,
'myspider.txt'
)
log
=
to_native_str
(
p
.
stderr
.
read
())
self
.
assertIn
(
'Unable to load'
,
log
)
def
test_start_requests_errors
(
self
):
p
=
self
.
runspider
(
"""
import scrapy
class BadSpider(scrapy.Spider):
name = "bad"
def start_requests(self):
raise Exception("oops!")
"""
,
name
=
"badspider.py"
)
log
=
to_native_str
(
p
.
stderr
.
read
())
self
.
assertIn
(
"Unable to load"
,
log
)
print
(
log
)
self
.
assertIn
(
"start_requests"
,
log
)
self
.
assertIn
(
"badspider.py"
,
log
)
class
ParseCommandTest
(
ProcessTest
,
SiteTest
,
CommandTest
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录