diff --git a/base/files/custom_spider_xyq.jar b/base/files/custom_spider_xyq.jar index f5f673b7bc9c380bfc84f03f9d6ef15fbef8ba58..82e170ed0bdbdddf11bf924e3402f3a2a62fca8e 100644 Binary files a/base/files/custom_spider_xyq.jar and b/base/files/custom_spider_xyq.jar differ diff --git a/base/rules.db b/base/rules.db index 7606596f53e0719f6144449a038d015c39527dad..70f6902eed6d11261a04246ce8d6d1046c9229f4 100644 Binary files a/base/rules.db and b/base/rules.db differ diff --git a/controllers/cms.py b/controllers/cms.py index c59ed66b7b99f190cbc3e1fa1a7c8aa8ed9a5332..94f3b6b5a8dc3f4dae0a244d4d37c75263dabc3f 100644 --- a/controllers/cms.py +++ b/controllers/cms.py @@ -8,7 +8,6 @@ import json import requests import re import math - import ujson from utils.web import * @@ -803,6 +802,7 @@ class CMS: else: p = p.split(';') # 解析 # print(len(p)) + # print(p) if len(p) < 5: return self.blank() @@ -862,7 +862,7 @@ class CMS: for video in videos: if video.get('vod_pic','') and str(video['vod_pic']).startswith('http'): video['vod_pic'] = f"{video['vod_pic']}{self.图片来源}" - print(videos) + print('videos:',videos) limit = 40 cnt = 9999 if len(videos) > 0 else 0 result['list'] = videos @@ -923,6 +923,8 @@ class CMS: if is_json: html = self.dealJson(html) html = json.loads(html) + + tt1 = time() if p.get('title'): p1 = p['title'].split(';') vod['vod_name'] = pdfh(html, p1[0]).replace('\n', ' ').strip() @@ -1090,12 +1092,14 @@ class CMS: vod_tab_list.append(vlist) vod_play_url = vod_play_url.join(vod_tab_list) - vod_play_url_str = vod_play_url[:min(len(vod_play_url),200)] + vod_play_url_str = vod_play_url[:min(len(vod_play_url),500)] print(vod_play_url_str) vod['vod_play_from'] = vod_play_from # print(vod_play_from) vod['vod_play_url'] = vod_play_url + logger.info(f'{self.getName()}仅二级渲染{len(vod_play_url.split("$$$")[0].split("$"))}集耗时:{get_interval(tt1)}毫秒,共计{round(len(str(vod)) / 1000, 2)} kb') + if show_name: vod['vod_content'] = f'({self.id}){vod.get("vod_content", "")}' return vod @@ -1196,6 +1200,9 @@ class CMS: return result def searchContent(self, key, fypage=1,show_name=False): + if self.encoding and str(self.encoding).startswith('gb'): + key = quote(key.encode('utf-8').decode('utf-8').encode(self.encoding,'ignore')) + # print(key) pg = str(fypage) if not self.searchUrl: return self.blank() diff --git "a/jiexi/\346\217\222\350\220\235\350\216\211\346\205\242.js" "b/jiexi/\346\217\222\350\220\235\350\216\211\346\205\242.js" new file mode 100644 index 0000000000000000000000000000000000000000..b75bfe29c63e4d480557fc01b6b9f74e142d3c3b --- /dev/null +++ "b/jiexi/\346\217\222\350\220\235\350\216\211\346\205\242.js" @@ -0,0 +1,12 @@ +let jxUrl = 'http://chaloli.cn/home/api?type=ys&uid=1&key=ekloswzABCGHKLOT58&url='; +fetch_params.headers.Referer = jxUrl; +try { + // realUrl = null; + let html = request(jxUrl+vipUrl); + // log(html); + realUrl = jsp.pjfh(html,'$..url'); + log('解析到真实播放地址:'+realUrl); +}catch (e) { + log('解析发生错误:'+e.message); + realUrl = vipUrl; +} \ No newline at end of file diff --git "a/jiexi/\346\261\237\346\271\226.js" "b/jiexi/\346\261\237\346\271\226.js" index 5e15863ec6db147ccdfc75acd228a06cea0e4838..36ea7780126994fc336ebf154455a6b056298e91 100644 --- "a/jiexi/\346\261\237\346\271\226.js" +++ "b/jiexi/\346\261\237\346\271\226.js" @@ -1,5 +1,6 @@ // realUrl = 重定向('http://211.99.99.236:4567/jhjson/ceshi.php?url='+vipUrl); -let jxUrl = 'http://211.99.99.236:4567/jhjson/ceshi.php?url='; +// let jxUrl = 'http://211.99.99.236:4567/jhjson/ceshi.php?url='; +let jxUrl = 'http://jx.vipmv.co/json.php?token=123457&url='; fetch_params.headers.Referer = jxUrl; try { // realUrl = null; diff --git a/js/custom_spider.jar b/js/custom_spider.jar index f5f673b7bc9c380bfc84f03f9d6ef15fbef8ba58..82e170ed0bdbdddf11bf924e3402f3a2a62fca8e 100644 Binary files a/js/custom_spider.jar and b/js/custom_spider.jar differ diff --git a/js/version.txt b/js/version.txt index 1b5e1f5f94c1f3db09c5814af7dd4a94b770e1f4..c214c2edf188c33a0a6cc78657d6fdfc444dda63 100644 --- a/js/version.txt +++ b/js/version.txt @@ -1 +1 @@ -3.9.20beta4 \ No newline at end of file +3.9.20beta5 \ No newline at end of file diff --git a/readme.md b/readme.md index ed6896e917c75dc78aee3e8fd394061fdb1c2557..7ed9bf075d4d7b6f460618f53e0b0082ad6cc58c 100644 --- a/readme.md +++ b/readme.md @@ -47,6 +47,8 @@ [dockerfile教程](https://blog.csdn.net/qq_46158060/article/details/125718218) [获取本地设备信息](https://blog.csdn.net/cui_yonghua/article/details/125508991) [获取本地设备信息](https://m.jb51.net/article/140716.htm) +###### 2022/11/08 +- [X] 海盗听书js0空白是触发了网页的cloudfare5秒盾了,暂时无解 ###### 2022/11/04 - [X] 增加了 /lives?path=txt/lives/18fm.txt 和 /lives?path=txt/lives/月光.txt ###### 2022/11/03 diff --git a/utils/htmlParser.py b/utils/htmlParser.py index c709b88d65cc8a41b9b06e369541216c9f1bd33e..6ae0a09882615ce0894f0f2bbbf486241914069f 100644 --- a/utils/htmlParser.py +++ b/utils/htmlParser.py @@ -11,9 +11,16 @@ from urllib.parse import urljoin import re from jsonpath import jsonpath +PARSE_CACHE = True # 解析缓存 + class jsoup: def __init__(self,MY_URL=''): self.MY_URL = MY_URL + self.pdfh_html = '' + self.pdfa_html = '' + + self.pdfh_doc = None + self.pdfa_doc = None def test(self, text:str, string:str): searchObj = re.search(rf'{text}', string, re.M | re.I) @@ -23,7 +30,13 @@ class jsoup: def pdfh(self,html,parse:str,add_url=False): if not parse: return '' - doc = pq(html) + if PARSE_CACHE: + if self.pdfh_html != html: + self.pdfh_html = html + self.pdfh_doc = pq(html) + doc = self.pdfh_doc + else: + doc = pq(html) if parse == 'body&&Text' or parse == 'Text': text = doc.text() return text @@ -92,8 +105,15 @@ class jsoup: parse = parse.split('&&') # 带&&的重新拼接 # print(f"{parse[0]},{self.test(':eq|:lt|:gt', parse[0])}") parse = ' '.join([parse[i] if self.test(':eq|:lt|:gt', parse[i]) or i>=len(parse)-1 else f'{parse[i]}:eq(0)' for i in range(len(parse))]) - # print(f'pdfa:{parse}') - doc = pq(html) + print(f'pdfa:{parse}') + # print(html) + if PARSE_CACHE: + if self.pdfa_html != html: + self.pdfa_html = html + self.pdfa_doc = pq(html) + doc = self.pdfa_doc + else: + doc = pq(html) result = doc(parse) # 节点转字符串 # print(str(etree.tostring(result[0], pretty_print=True), 'utf-8'))