提交 b6334561 编写于 作者: H hjdhnx

测试了另外的parsel解析库

上级 c2a16471
......@@ -13,6 +13,9 @@ assert subprocess.call(
shell=True,
cwd=DIRNAME) == 0, 'Could not link required node_modules'
另外一个很强的css解析库,性能待验证parsel
https://cuiqingcai.com/202232.html
远程直播地址:
"lives":[{"group":"redirect","channels":[{"name":"直播","urls":["proxy://do=live&type=txt&ext=aHR0cHM6Ly9hZ2l0LmFpL2xjeC8xMS9yYXcvYnJhbmNoL21hc3Rlci9saXZl"]}]}],
下面格式原版tv_box才能用?
......
gevent-websocket
PyExecJS
ddddocr
\ No newline at end of file
ddddocr
parsel # 不知道对比pyquery性能如何,也是css选择器,不好封装成pdfa
\ No newline at end of file
......@@ -68,4 +68,29 @@ class jsoup:
return self.pdfh(html,parse,True)
def pq(self,html):
return pq(html)
\ No newline at end of file
return pq(html)
if __name__ == '__main__':
import requests
from parsel import Selector
url = 'http://360yy.cn'
jsp = jsoup(url)
def pdfa2(html,parse):
if not parse:
return []
if parse.find('&&') > -1:
parse = parse.split('&&') # 带&&的重新拼接
# print(f"{parse[0]},{self.test(':eq|:lt|:gt', parse[0])}")
# parse = ' '.join([parse[i] if self.test(':eq|:lt|:gt', parse[i]) or i>=len(parse)-1 else f'{parse[i]}:eq(0)' for i in range(len(parse))])
parse = ' '.join([parse[i] if jsoup().test(':eq|:lt|:gt', parse[i]) or i>=len(parse)-1 else f'{parse[i]}:nth-child(1)' for i in range(len(parse))])
# print(f'pdfa:{parse}')
selector = Selector(text=html)
print(parse)
items = selector.css(parse)
return [str(item) for item in items]
r = requests.get(url)
html = r.text
# parsel 不好用啊,很难实现封装pdfa之类的函数
items = pdfa2(html,'.fed-pops-navbar&&ul.fed-part-rows&&a')
print(items)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册