提交 454d5e57 编写于 作者: J Jana Cavojska

checking for subclass of URLWarning instead of checking error message text...

checking for subclass of URLWarning instead of checking error message text when URL in allowed_domains
上级 8ec3b476
......@@ -56,10 +56,15 @@ class OffsiteMiddleware(object):
for domain in allowed_domains:
url_pattern = re.compile("^https?://.*$")
if url_pattern.match(domain):
warnings.warn("allowed_domains accepts only domains, not URLs. Ignoring URL entry %s in allowed_domains." % domain, Warning)
warnings.warn("allowed_domains accepts only domains, not URLs. Ignoring URL entry %s in allowed_domains." % domain, URLWarning)
regex = r'^(.*\.)?(%s)$' % '|'.join(re.escape(d) for d in allowed_domains if d is not None)
return re.compile(regex)
def spider_opened(self, spider):
self.host_regex = self.get_host_regex(spider)
self.domains_seen = set()
class URLWarning(Warning):
pass
\ No newline at end of file
......@@ -5,6 +5,7 @@ from six.moves.urllib.parse import urlparse
from scrapy.http import Response, Request
from scrapy.spiders import Spider
from scrapy.spidermiddlewares.offsite import OffsiteMiddleware
from scrapy.spidermiddlewares.offsite import URLWarning
from scrapy.utils.test import get_crawler
import warnings
......@@ -78,4 +79,4 @@ class TestOffsiteMiddleware5(TestOffsiteMiddleware4):
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
self.mw.get_host_regex(self.spider)
assert "allowed_domains accepts only domains, not URLs." in str(w[-1].message)
assert issubclass(w[-1].category, URLWarning)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册