diff --git a/controllers/cms.py b/controllers/cms.py index 04c4dada09e5b668d7a1d5790bcfc09f2ee5ea0b..a82e5a02dfe165755ad5ac430813360ffd2220bd 100644 --- a/controllers/cms.py +++ b/controllers/cms.py @@ -26,7 +26,7 @@ py_ctx = { 'requests':requests,'print':print,'base64Encode':base64Encode,'baseDecode':baseDecode, 'log':logger.info,'fetch':fetch,'post':post,'request':request,'getCryptoJS':getCryptoJS, 'buildUrl':buildUrl,'getHome':getHome,'setDetail':setDetail,'join':join,'urljoin2':urljoin2, -'PC_UA':PC_UA,'MOBILE_UA':MOBILE_UA,'UC_UA':UC_UA +'PC_UA':PC_UA,'MOBILE_UA':MOBILE_UA,'UC_UA':UC_UA,'IOS_UA':IOS_UA } # print(getCryptoJS()) @@ -104,6 +104,8 @@ class CMS: headers[k] = PC_UA elif v == 'UC_UA': headers[k] = UC_UA + elif v == 'IOS_UA': + headers[k] = IOS_UA lower_keys = list(map(lambda x:x.lower(),keys)) if not 'user-agent' in lower_keys: headers['User-Agent'] = UA @@ -302,6 +304,14 @@ class CMS: else: return '' + def dealJson(self,html): + try: + res = re.search('.*?{(.*)}',html,re.M|re.I).groups()[0] + html = '{' + res + '}' + return html + except: + return html + def checkHtml(self,r): r.encoding = self.encoding html = r.text @@ -474,6 +484,8 @@ class CMS: return self.blank() jsp = jsoup(self.homeUrl) is_json = str(p[0]).startswith('json:') + if is_json: + html = self.dealJson(html) pdfh = jsp.pjfh if is_json else jsp.pdfh pdfa = jsp.pjfa if is_json else jsp.pdfa pd = jsp.pj if is_json else jsp.pd @@ -481,12 +493,17 @@ class CMS: try: if self.double: items = pdfa(html, p[0]) + # print(items) for item in items: items2 = pdfa(item,p[1]) + # print(items2) for item2 in items2: try: title = pdfh(item2, p[2]) - img = pd(item2, p[3]) + try: + img = pd(item2, p[3]) + except: + img = '' desc = pdfh(item2, p[4]) links = [pd(item2, p5) if not self.detailUrl else pdfh(item2, p5) for p5 in p[5].split('+')] link = '$'.join(links) @@ -617,6 +634,7 @@ class CMS: r = requests.get(url, headers=self.headers, timeout=self.timeout) html = self.checkHtml(r) if is_json: + html = self.dealJson(html) html = json.loads(html) # print(html) items = pdfa(html,p[0].replace('json:','',1)) @@ -659,8 +677,10 @@ class CMS: def detailOneVod(self,id,fyclass=''): detailUrl = str(id) vod = {} - if not detailUrl.startswith('http'): + if not detailUrl.startswith('http') and not '/' in detailUrl: url = self.detailUrl.replace('fyid', detailUrl).replace('fyclass',fyclass) + elif '/' in detailUrl: + url = urljoin(self.homeUrl,detailUrl) else: url = detailUrl print(url) @@ -671,8 +691,8 @@ class CMS: vod['vod_play_from'] = '道长在线' vod['vod_remarks'] = detailUrl vod['vod_actor'] = '没有二级,只有一级链接直接嗅探播放' - vod['vod_content'] = detailUrl - vod['vod_play_url'] = '嗅探播放$'+self.play_url+detailUrl + vod['vod_content'] = url + vod['vod_play_url'] = '嗅探播放$'+self.play_url+url print(vod) return vod @@ -718,6 +738,7 @@ class CMS: r = requests.get(url, headers=self.headers, timeout=self.timeout) html = self.checkHtml(r) if is_json: + html = self.dealJson(html) html = json.loads(html) if p.get('title'): p1 = p['title'].split(';') @@ -867,7 +888,7 @@ class CMS: 'list': [] } logger.info(f'{self.getName()}获取详情页耗时:{get_interval(t1)}毫秒,发生错误:{e}') - print(result) + # print(result) return result def searchContent(self, key, fypage=1): @@ -919,6 +940,7 @@ class CMS: r = requests.get(url, headers=self.headers,timeout=self.timeout) html = self.checkHtml(r) if is_json: + html = self.dealJson(html) html = json.loads(html) # print(html) if not is_json and html.find('输入验证码') > -1: @@ -1008,6 +1030,7 @@ class CMS: play_url = lazy_url else: jscode = str(self.lazy).split('js:')[1] + jsp = jsoup(self.url) # jscode = f'var input={play_url};{jscode}' # print(jscode) headers['Referer'] = getHome(play_url) @@ -1018,6 +1041,7 @@ class CMS: 'jxs':jxs, 'getParse':self.d.getParse, 'saveParse':self.d.saveParse, + 'jsp': jsp, 'pdfh': self.d.jsp.pdfh, 'pdfa': self.d.jsp.pdfa, 'pd': self.d.jsp.pd, }) diff --git a/js/LIBVIO.js b/js/LIBVIO.js index 10efee01ac5ecaf3236464a95f93c0bc22473314..9e6d9726d6866761f6be646782e62c1b48f23f50 100644 --- a/js/LIBVIO.js +++ b/js/LIBVIO.js @@ -1,6 +1,7 @@ var rule = Object.assign(muban.首图2,{ title:'LIBVIO', -host:'https://www.libvio.me', +// host:'https://www.libvio.me', +host:'https://www.libvio.fun', url:'/type/fyclass-fypage.html', class_parse:'.stui-header__menu li:gt(0):lt(7);a&&Text;a&&href;/(\\d+).html', searchUrl:'/search/**----------fypage---.html', diff --git a/js/version.txt b/js/version.txt index 5141b615b4b2331874318b0dbdcb315ec6a77922..3ec370e154b23d02b25cd92d09634cee2ae75c04 100644 --- a/js/version.txt +++ b/js/version.txt @@ -1 +1 @@ -3.4.4 \ No newline at end of file +3.4.5 \ No newline at end of file diff --git "a/js/\345\205\224\345\260\217\350\264\235.js" "b/js/\345\205\224\345\260\217\350\264\235.js" new file mode 100644 index 0000000000000000000000000000000000000000..4ef0dac26e9c977e2d5a5a3fc889352e2049b3f2 --- /dev/null +++ "b/js/\345\205\224\345\260\217\350\264\235.js" @@ -0,0 +1,27 @@ +var rule = { + title:'兔小贝', + host:'https://www.tuxiaobei.com', + homeUrl:'', + url:'/list/mip-data?typeId=fyclass&page=fypage&callback=', + detailUrl:'/play/fyid', + searchUrl:'/search/index?key=**', + headers:{ + 'User-Agent':'MOBILE_UA' + }, + timeout:5000, + class_url:'2&3&4&25', + class_name:'儿歌&故事&国学&启蒙', + //class_name:'#page-viewport&&ul&&li;.text&&Text;a&&href;/(.*)', + cate_exclude:'应用', + 推荐:'.pic-list.list-box;.items;.text&&Text;mip-img&&src;.all&&Text;a&&href', + double:true, + limit:5, + play_parse:true, + lazy:'js:fetch_params.headers["user-agent"] = IOS_UA;let html=fetch(input,fetch_params);let src = jsp.pdfh(html,"body&&#videoWrap&&video-src");input=src;', + // 一级:'json:data.items;name;image;collect_num;category_id+video_id', + 一级:'json:data.items;name;image;duration_string;video_id', + 二级:'*', + 搜索:'.list-con&&.items;.text&&Text;mip-img&&src;.time&&Text;a&&href', + searchable:1, + quickSearch:0, +} \ No newline at end of file diff --git a/readme.md b/readme.md index a2699b27cb66724bc65d89c6556191638bc12401..832b2fd07ddd9aece615a79d9e08ff68fbe60d1f 100644 --- a/readme.md +++ b/readme.md @@ -48,6 +48,7 @@ [获取本地设备信息](https://m.jb51.net/article/140716.htm) ###### 2022/09/10 - [X] 1.升级至3.4.4.增加小强迷源,增加二级重定向属性(提供重定向后的源码,让代码重新取重定向过后的线路和播放列表) +- [X] 1.升级至3.4.5.增加兔小贝儿歌源,优化json:细节处理以及详情页拼接细节 ###### 2022/09/09 - [X] 1.增加西瓜源,修复一级不支持lazy的bug - [X] 2.兄弟们dockerhub没法push镜像不知道咋回事,3.4.1的镜像自己用docker目录下的文件build吧 diff --git a/utils/encode.py b/utils/encode.py index 31b6b4c59af68c8ba18540d6bd79c639b6d31987..49c72ad3714547d7865c029a32aae23d0e570b84 100644 --- a/utils/encode.py +++ b/utils/encode.py @@ -174,7 +174,7 @@ def base_request(url,obj): method = 'get' obj['method'] = 'method' # print(obj) - print(f'{method}:{url}') + print(f"{method}:{url}:{obj['headers']}") try: # r = requests.get(url, headers=headers, params=body, timeout=timeout) if method.lower() == 'get': @@ -194,7 +194,7 @@ def base_request(url,obj): def fetch(url,obj): obj = dealObj(obj) if not obj.get('headers') or not obj['headers'].get('User-Agent'): - obj['headers']['User-Agent'] = PC_UA + obj['headers']['User-Agent'] = obj['headers'].get('user-agent',PC_UA) return base_request(url,obj) def post(url,obj): @@ -206,7 +206,7 @@ def request(url,obj): obj = dealObj(obj) # print(f'{method}:{url}') if not obj.get('headers') or not obj['headers'].get('User-Agent'): - obj['headers']['User-Agent'] = UC_UA + obj['headers']['User-Agent'] = obj['headers'].get('user-agent',UC_UA) return base_request(url, obj) diff --git a/utils/htmlParser.py b/utils/htmlParser.py index c245a4e511b11a17a5c812474a125778f4a8398a..339b8e202d20f4c820e01ddfbd06b361219d066c 100644 --- a/utils/htmlParser.py +++ b/utils/htmlParser.py @@ -28,13 +28,13 @@ class jsoup: option = parse.split('&&')[-1] parse = parse.split('&&')[:-1] # 如果只有一个&& 取的就直接是0 if len(parse) > 1: # 如果不大于1可能就是option操作,不需要拼eq - parse = ' '.join([i if self.test(':eq|:lt|:gt',i) else f'{i}:eq(0)' for i in parse]) + parse = ' '.join([i if self.test(':eq|:lt|:gt|#',i) else f'{i}:eq(0)' for i in parse]) else: - parse = parse[0] if self.test(':eq|:lt|:gt',parse[0]) else f'{parse[0]}:eq(0)' - + parse = parse[0] if self.test(':eq|:lt|:gt|#',parse[0]) else f'{parse[0]}:eq(0)' if option: # print(f'parse:{parse}=>(option:{option})') ret = doc(parse) + # print(html) # FIXME 解析出来有多个的情况应该自动取第一个 if option == 'Text': ret = ret.text() diff --git a/utils/update.py b/utils/update.py index 1ce6bfb5383ac9a07b368b184612abbde70887d7..45e32d1d1bf1b89554425fbe608cbdcd44a78786 100644 --- a/utils/update.py +++ b/utils/update.py @@ -96,6 +96,7 @@ def copy_to_update(): # print(f'升级失败,找不到目录{dr_path}') logger.info(f'升级失败,找不到目录{dr_path}') return False + # 千万不能覆盖super,base paths = ['js','models','controllers','libs','static','templates','utils','txt'] for path in paths: force_copy_files(os.path.join(dr_path, path),os.path.join(base_path, path)) diff --git a/utils/web.py b/utils/web.py index 508fe58909cae0a1f488ac6853be60e9424c66b0..6a3c5bfabd10fbde66f46dcfcc34aa66344890a5 100644 --- a/utils/web.py +++ b/utils/web.py @@ -14,6 +14,7 @@ MOBILE_UA = 'Mozilla/5.0 (Linux; Android 11; M2007J3SC Build/RKQ1.200826.002; wv PC_UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36' UA = 'Mozilla/5.0' UC_UA = 'Mozilla/5.0 (Linux; U; Android 9; zh-CN; MI 9 Build/PKQ1.181121.001) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.108 UCBrowser/12.5.5.1035 Mobile Safari/537.36' +IOS_UA = 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1' headers = { 'Referer': 'https://www.baidu.com', 'user-agent': UA,