提交 db1394dc 编写于 作者: H hjdhnx

测试案例完美通过

上级 72081f7e
3.7.6
\ No newline at end of file
3.7.5beta10
\ No newline at end of file
......@@ -29,8 +29,10 @@ def main():
print(a)
a = jsp.pdfh(lis[0], 'a:eq(1) li img')
print(a)
a = jsp.pd(lis[0], 'a&&li&&img&&src')
print(a)
a = jsp.pd(lis[0], 'a:eq(1)&&li&&img&&src')
print('src:',a)
a = jsp.pd(lis[0], 'a&&href')
print('href:', a)
def main1():
url = 'https://www.lanhua.tv/voddetail/7420.html'
......@@ -89,7 +91,7 @@ def main3():
print(a)
if __name__ == '__main__':
# main()
main()
# main1()
# main2()
main3()
\ No newline at end of file
# main3()
\ No newline at end of file
......@@ -160,7 +160,7 @@ class jsoup:
res = [item.outerHtml() for item in ret.items()]
return res
def pdfh(self, html, parse: str, add_url=False):
def pdfh(self, html, parse: str, add_url=False, base_url: str = ''):
if not all([html, parse]):
return ''
if PARSE_CACHE:
......@@ -210,13 +210,15 @@ class jsoup:
if 'http' in ret:
ret = ret[ret.find('http'):]
else:
ret = urljoin(self.MY_URL, ret)
if not base_url:
base_url = self.MY_URL
ret = urljoin(base_url, ret)
else:
ret = ret.outerHtml()
return ret
def pd(self, html, parse: str):
return self.pdfh(html, parse, True)
def pd(self, html, parse: str, base_url: str = ''):
return self.pdfh(html, parse, True, base_url)
def pq(self, html: str):
return pq(html)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册