提交 5246986d 编写于 作者: D Daniel Graña

Merge pull request #693 from redapple/domain-casesensitivity

Support case-insensitive domains in url_is_from_any_domain()
......@@ -17,6 +17,10 @@ class UrlUtilsTest(unittest.TestCase):
self.assertTrue(url_is_from_any_domain(url, ['wheele-bin-art.co.uk']))
self.assertFalse(url_is_from_any_domain(url, ['art.co.uk']))
url = 'http://www.Wheele-Bin-Art.co.uk/get/product/123'
self.assertTrue(url_is_from_any_domain(url, ['wheele-bin-art.CO.UK']))
self.assertTrue(url_is_from_any_domain(url, ['WHEELE-BIN-ART.CO.UK']))
url = 'http://192.169.0.15:8080/mypage.html'
self.assertTrue(url_is_from_any_domain(url, ['192.169.0.15:8080']))
self.assertFalse(url_is_from_any_domain(url, ['192.169.0.15']))
......
......@@ -17,10 +17,10 @@ from scrapy.utils.python import unicode_to_str
def url_is_from_any_domain(url, domains):
"""Return True if the url belongs to any of the given domains"""
host = parse_url(url).netloc
host = parse_url(url).netloc.lower()
if host:
return any(((host == d) or (host.endswith('.%s' % d)) for d in domains))
return any(((host == d.lower()) or (host.endswith('.%s' % d.lower())) for d in domains))
else:
return False
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册