提交 201a16f6 编写于 作者: P Paul Tremberth 提交者: GitHub

Merge pull request #2394 from nyov/fix-le

[MRG+1] LinkExtractor PY3 'unicode' type fix
......@@ -3,6 +3,7 @@ HTMLParser-based link extractor
"""
import warnings
import six
from six.moves.html_parser import HTMLParser
from six.moves.urllib.parse import urljoin
......@@ -39,7 +40,7 @@ class HtmlParserLinkExtractor(HTMLParser):
ret = []
base_url = urljoin(response_url, self.base_url) if self.base_url else response_url
for link in links:
if isinstance(link.url, unicode):
if isinstance(link.url, six.text_type):
link.url = link.url.encode(response_encoding)
try:
link.url = urljoin(base_url, link.url)
......
"""
SGMLParser-based Link extractors
"""
import six
from six.moves.urllib.parse import urljoin
import warnings
from sgmllib import SGMLParser
......@@ -40,7 +41,7 @@ class BaseSgmlLinkExtractor(SGMLParser):
if base_url is None:
base_url = urljoin(response_url, self.base_url) if self.base_url else response_url
for link in self.links:
if isinstance(link.url, unicode):
if isinstance(link.url, six.text_type):
link.url = link.url.encode(response_encoding)
try:
link.url = urljoin(base_url, link.url)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册