提交 62a62610 编写于 作者: J Jana Cavojska

Issues a warning when user puts a URL into allowed_domains (#2250)

上级 b8870ee8
......@@ -52,6 +52,10 @@ class OffsiteMiddleware(object):
allowed_domains = getattr(spider, 'allowed_domains', None)
if not allowed_domains:
return re.compile('') # allow all by default
for domainIndex in range(0, len(allowed_domains)):
url_pattern = re.compile("^https?://.*$")
if url_pattern.match(allowed_domains[domainIndex]):
logger.warn("allowed_domains accepts only domains, not URLs. Ignoring URL entry %s in allowed_domains." % allowed_domains[domainIndex])
regex = r'^(.*\.)?(%s)$' % '|'.join(re.escape(d) for d in allowed_domains if d is not None)
return re.compile(regex)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册