提交 68f8ca07 编写于 作者: H hjdhnx

更新jar,优化js0解析性能,修复江湖解析

上级 7dac66f9
无法预览此类型文件
...@@ -8,7 +8,6 @@ import json ...@@ -8,7 +8,6 @@ import json
import requests import requests
import re import re
import math import math
import ujson import ujson
from utils.web import * from utils.web import *
...@@ -803,6 +802,7 @@ class CMS: ...@@ -803,6 +802,7 @@ class CMS:
else: else:
p = p.split(';') # 解析 p = p.split(';') # 解析
# print(len(p)) # print(len(p))
# print(p)
if len(p) < 5: if len(p) < 5:
return self.blank() return self.blank()
...@@ -862,7 +862,7 @@ class CMS: ...@@ -862,7 +862,7 @@ class CMS:
for video in videos: for video in videos:
if video.get('vod_pic','') and str(video['vod_pic']).startswith('http'): if video.get('vod_pic','') and str(video['vod_pic']).startswith('http'):
video['vod_pic'] = f"{video['vod_pic']}{self.图片来源}" video['vod_pic'] = f"{video['vod_pic']}{self.图片来源}"
print(videos) print('videos:',videos)
limit = 40 limit = 40
cnt = 9999 if len(videos) > 0 else 0 cnt = 9999 if len(videos) > 0 else 0
result['list'] = videos result['list'] = videos
...@@ -923,6 +923,8 @@ class CMS: ...@@ -923,6 +923,8 @@ class CMS:
if is_json: if is_json:
html = self.dealJson(html) html = self.dealJson(html)
html = json.loads(html) html = json.loads(html)
tt1 = time()
if p.get('title'): if p.get('title'):
p1 = p['title'].split(';') p1 = p['title'].split(';')
vod['vod_name'] = pdfh(html, p1[0]).replace('\n', ' ').strip() vod['vod_name'] = pdfh(html, p1[0]).replace('\n', ' ').strip()
...@@ -1090,12 +1092,14 @@ class CMS: ...@@ -1090,12 +1092,14 @@ class CMS:
vod_tab_list.append(vlist) vod_tab_list.append(vlist)
vod_play_url = vod_play_url.join(vod_tab_list) vod_play_url = vod_play_url.join(vod_tab_list)
vod_play_url_str = vod_play_url[:min(len(vod_play_url),200)] vod_play_url_str = vod_play_url[:min(len(vod_play_url),500)]
print(vod_play_url_str) print(vod_play_url_str)
vod['vod_play_from'] = vod_play_from vod['vod_play_from'] = vod_play_from
# print(vod_play_from) # print(vod_play_from)
vod['vod_play_url'] = vod_play_url vod['vod_play_url'] = vod_play_url
logger.info(f'{self.getName()}仅二级渲染{len(vod_play_url.split("$$$")[0].split("$"))}集耗时:{get_interval(tt1)}毫秒,共计{round(len(str(vod)) / 1000, 2)} kb')
if show_name: if show_name:
vod['vod_content'] = f'({self.id}){vod.get("vod_content", "")}' vod['vod_content'] = f'({self.id}){vod.get("vod_content", "")}'
return vod return vod
...@@ -1196,6 +1200,9 @@ class CMS: ...@@ -1196,6 +1200,9 @@ class CMS:
return result return result
def searchContent(self, key, fypage=1,show_name=False): def searchContent(self, key, fypage=1,show_name=False):
if self.encoding and str(self.encoding).startswith('gb'):
key = quote(key.encode('utf-8').decode('utf-8').encode(self.encoding,'ignore'))
# print(key)
pg = str(fypage) pg = str(fypage)
if not self.searchUrl: if not self.searchUrl:
return self.blank() return self.blank()
......
let jxUrl = 'http://chaloli.cn/home/api?type=ys&uid=1&key=ekloswzABCGHKLOT58&url=';
fetch_params.headers.Referer = jxUrl;
try {
// realUrl = null;
let html = request(jxUrl+vipUrl);
// log(html);
realUrl = jsp.pjfh(html,'$..url');
log('解析到真实播放地址:'+realUrl);
}catch (e) {
log('解析发生错误:'+e.message);
realUrl = vipUrl;
}
\ No newline at end of file
// realUrl = 重定向('http://211.99.99.236:4567/jhjson/ceshi.php?url='+vipUrl); // realUrl = 重定向('http://211.99.99.236:4567/jhjson/ceshi.php?url='+vipUrl);
let jxUrl = 'http://211.99.99.236:4567/jhjson/ceshi.php?url='; // let jxUrl = 'http://211.99.99.236:4567/jhjson/ceshi.php?url=';
let jxUrl = 'http://jx.vipmv.co/json.php?token=123457&url=';
fetch_params.headers.Referer = jxUrl; fetch_params.headers.Referer = jxUrl;
try { try {
// realUrl = null; // realUrl = null;
......
3.9.20beta4 3.9.20beta5
\ No newline at end of file \ No newline at end of file
...@@ -47,6 +47,8 @@ ...@@ -47,6 +47,8 @@
[dockerfile教程](https://blog.csdn.net/qq_46158060/article/details/125718218) [dockerfile教程](https://blog.csdn.net/qq_46158060/article/details/125718218)
[获取本地设备信息](https://blog.csdn.net/cui_yonghua/article/details/125508991) [获取本地设备信息](https://blog.csdn.net/cui_yonghua/article/details/125508991)
[获取本地设备信息](https://m.jb51.net/article/140716.htm) [获取本地设备信息](https://m.jb51.net/article/140716.htm)
###### 2022/11/08
- [X] 海盗听书js0空白是触发了网页的cloudfare5秒盾了,暂时无解
###### 2022/11/04 ###### 2022/11/04
- [X] 增加了 /lives?path=txt/lives/18fm.txt 和 /lives?path=txt/lives/月光.txt - [X] 增加了 /lives?path=txt/lives/18fm.txt 和 /lives?path=txt/lives/月光.txt
###### 2022/11/03 ###### 2022/11/03
......
...@@ -11,9 +11,16 @@ from urllib.parse import urljoin ...@@ -11,9 +11,16 @@ from urllib.parse import urljoin
import re import re
from jsonpath import jsonpath from jsonpath import jsonpath
PARSE_CACHE = True # 解析缓存
class jsoup: class jsoup:
def __init__(self,MY_URL=''): def __init__(self,MY_URL=''):
self.MY_URL = MY_URL self.MY_URL = MY_URL
self.pdfh_html = ''
self.pdfa_html = ''
self.pdfh_doc = None
self.pdfa_doc = None
def test(self, text:str, string:str): def test(self, text:str, string:str):
searchObj = re.search(rf'{text}', string, re.M | re.I) searchObj = re.search(rf'{text}', string, re.M | re.I)
...@@ -23,7 +30,13 @@ class jsoup: ...@@ -23,7 +30,13 @@ class jsoup:
def pdfh(self,html,parse:str,add_url=False): def pdfh(self,html,parse:str,add_url=False):
if not parse: if not parse:
return '' return ''
doc = pq(html) if PARSE_CACHE:
if self.pdfh_html != html:
self.pdfh_html = html
self.pdfh_doc = pq(html)
doc = self.pdfh_doc
else:
doc = pq(html)
if parse == 'body&&Text' or parse == 'Text': if parse == 'body&&Text' or parse == 'Text':
text = doc.text() text = doc.text()
return text return text
...@@ -92,8 +105,15 @@ class jsoup: ...@@ -92,8 +105,15 @@ class jsoup:
parse = parse.split('&&') # 带&&的重新拼接 parse = parse.split('&&') # 带&&的重新拼接
# print(f"{parse[0]},{self.test(':eq|:lt|:gt', parse[0])}") # print(f"{parse[0]},{self.test(':eq|:lt|:gt', parse[0])}")
parse = ' '.join([parse[i] if self.test(':eq|:lt|:gt', parse[i]) or i>=len(parse)-1 else f'{parse[i]}:eq(0)' for i in range(len(parse))]) parse = ' '.join([parse[i] if self.test(':eq|:lt|:gt', parse[i]) or i>=len(parse)-1 else f'{parse[i]}:eq(0)' for i in range(len(parse))])
# print(f'pdfa:{parse}') print(f'pdfa:{parse}')
doc = pq(html) # print(html)
if PARSE_CACHE:
if self.pdfa_html != html:
self.pdfa_html = html
self.pdfa_doc = pq(html)
doc = self.pdfa_doc
else:
doc = pq(html)
result = doc(parse) result = doc(parse)
# 节点转字符串 # 节点转字符串
# print(str(etree.tostring(result[0], pretty_print=True), 'utf-8')) # print(str(etree.tostring(result[0], pretty_print=True), 'utf-8'))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册