diff --git "a/js/\347\216\251\345\201\266\345\247\220\345\247\220.js" "b/js/\347\216\251\345\201\266\345\247\220\345\247\220.js" index d166af42870b1dbdb1cd663565e9a7d3cd034687..eb178b1dfa2ea032789b1ca2fb690532e40f7770 100644 --- "a/js/\347\216\251\345\201\266\345\247\220\345\247\220.js" +++ "b/js/\347\216\251\345\201\266\345\247\220\345\247\220.js" @@ -2,13 +2,13 @@ var rule = { title:'玩偶姐姐', host:'https://hongkongdollvideo.com', homeUrl:'/latest/', - url:'/fyclass/fypage.html[/fypage/]', + url:'/fyclass/fypage.html[/fyclass/]', + headers:{ + 'User-Agent':'MOBILE_UA' + }, + timeout:5000, searchUrl:'/vodsearch/**----------fypage---.html', - ua:'MOBILE_UA', - class_name:'最新&Hongkong Doll&麻豆传媒&91制片厂&天美传媒&蜜桃传媒&皇家华人&星空传媒&精东影业&乐播传媒&成人头条&乌鸦传媒&兔子先生&杏吧原创&mini传媒&大象传媒&开心鬼传媒&PsychoPorn&糖心Vlog', - class_url:'latest&Hongkong Doll&麻豆传媒&91制片厂&天美传媒&蜜桃传媒&皇家华人&星空传媒&精东影业&乐播传媒&成人头条&乌鸦传媒&兔子先生&杏吧原创&mini传媒&大象传媒&开心鬼传媒&PsychoPorn&糖心Vlog', - class_parse:'body&&#side-menu:not(:has(.menu-icons))&&li;', + class_parse:'#side-menu:lt(1) li;a&&Text;a&&href;com/(.*?)/', 一级:'.col-sm-6;h3&&Text;img&&data-src;.date&&Text;a&&href', - 二级:{"title":"h1&&Text;.module-info-tag&&Text","img":".lazyload&&data-original","desc":".module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text","content":".module-info-introduction&&Text","tabs":".module-tab-item","lists":".module-play-list:eq(#id) a"}, - 搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text', + 二级:'*', } \ No newline at end of file diff --git a/models/cms.py b/models/cms.py index fcad664efe50202ebc9c4688ff2ee6e8de3c766c..973789e7b63e122b95ee91af65801db8fbe86cf1 100644 --- a/models/cms.py +++ b/models/cms.py @@ -4,7 +4,7 @@ # Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------ # Date : 2022/8/25 import requests - +import re from utils.web import * from utils.config import config from utils.htmlParser import jsoup @@ -12,19 +12,36 @@ from urllib.parse import urljoin class CMS: def __init__(self,rule): - self.host = rule.get('host','').rstrip('/') - self.homeUrl = rule.get('homeUrl','') - self.url = rule.get('url','').rstrip('/') - self.detailUrl = rule.get('detailUrl','').rstrip('/') - self.searchUrl = rule.get('searchUrl','') - ua = rule.get('ua','') - if ua == 'MOBILE_UA': - self.ua = MOBILE_UA - elif ua == 'PC_UA': - self.ua = PC_UA + host = rule.get('host','').rstrip('/') + timeout = rule.get('timeout',2000) + homeUrl = rule.get('homeUrl','') + url = rule.get('url','') + detailUrl = rule.get('detailUrl','') + searchUrl = rule.get('searchUrl','') + headers = rule.get('headers',{}) + keys = headers.keys() + for k in headers.keys(): + if str(k).lower() == 'user-agent': + v = headers[k] + if v == 'MOBILE_UA': + headers[k] = MOBILE_UA + elif v == 'PC_UA': + headers[k] = PC_UA + lower_keys = list(map(lambda x:x.lower(),keys)) + if not 'user-agent' in lower_keys: + headers['User-Agent'] = UA + self.headers = headers + self.host = host + self.homeUrl = urljoin(host,homeUrl) if host and homeUrl else homeUrl + if url.find('[') >-1 and url.find(']') > -1: + u1 = url.split('[')[0] + u2 = url.split('[')[1].split(']')[0] + self.url = urljoin(host,u1)+'['+urljoin(host,u2)+']' if host and url else url else: - self.ua = UA - self.searchUrl = rule.get('searchUrl','') + self.url = urljoin(host, url) if host and url else url + + self.detailUrl = urljoin(host,detailUrl) if host and detailUrl else detailUrl + self.searchUrl = urljoin(host,searchUrl) if host and searchUrl else searchUrl self.class_name = rule.get('class_name','') self.class_url = rule.get('class_url','') self.class_parse = rule.get('class_parse','') @@ -32,26 +49,98 @@ class CMS: self.二级 = rule.get('二级','') self.搜索 = rule.get('搜索','') self.title = rule.get('title','') + self.timeout = round(int(timeout)/1000,2) self.filter = rule.get('filter',[]) self.extend = rule.get('extend',[]) def getName(self): return self.title + def regexp(self,prule,text,pos=None): + ret = re.search(prule,text).groups() + if pos != None and isinstance(pos,int): + return ret[pos] + else: + return ret + + def test(self,text,string): + searchObj = re.search(rf'{text}', string, re.M | re.I) + # print(searchObj) + # global vflag + if searchObj: + # vflag = searchObj.group() + pass + return searchObj + + def blank(self): + result = { + 'list': [] + } + return result + + def blank_vod(self): + return { + "vod_id": "", + "vod_name": "", + "vod_pic": "", + "type_name": "", + "vod_year": "", + "vod_area": "", + "vod_remarks": "", + "vod_actor": "", + "vod_director": "", + "vod_content": "" + } + + def jsoup(self): + jsp = jsoup(self.url) + pdfh = jsp.pdfh + pdfa = jsp.pdfa + pd = jsp.pd + pq = jsp.pq + return pdfh,pdfa,pd,pq + def homeContent(self): # yanaifei # https://yanetflix.com/vodtype/dianying.html result = {} - class_names = self.class_name.split('&') - class_urls = self.class_url.split('&') - cnt = min(len(class_urls),len(class_names)) classes = [] - for i in range(cnt): - classes.append({ - 'type_name': class_names[i], - 'type_id': class_urls[i] - }) - print(self.url) + + if self.class_url and self.class_name: + class_names = self.class_name.split('&') + class_urls = self.class_url.split('&') + cnt = min(len(class_urls), len(class_names)) + for i in range(cnt): + classes.append({ + 'type_name': class_names[i], + 'type_id': class_urls[i] + }) + # print(self.url) + if self.homeUrl and self.class_parse: + # print(self.homeUrl) + # print(self.class_parse) + try: + r = requests.get(self.homeUrl,headers=self.headers,timeout=self.timeout) + html = r.text + p = self.class_parse.split(';') + jsp = jsoup(self.url) + pdfh = jsp.pdfh + pdfa = jsp.pdfa + pd = jsp.pd + items = pdfa(html,p[0]) + for item in items: + title = pdfh(item, p[1]) + url = pd(item, p[2]) + tag = url + if len(p) > 3 and p[3].strip(): + tag = self.regexp(p[3].strip(),url,0) + classes.append({ + 'type_name': title, + 'type_id': tag + }) + except Exception as e: + print(e) + result['class'] = classes if self.filter: result['filters'] = config['filter'] @@ -83,10 +172,14 @@ class CMS: # url = self.url + '/{0}.html'.format(params) pg = str(fypage) url = self.url.replace('fyclass',fyclass).replace('fypage',pg) - print(url) - headers = {'user-agent': self.ua} - r = requests.get(url, headers=headers) + if fypage == 1 and self.test('[\[\]]',url): + url = url.split('[')[1].split(']')[0] + r = requests.get(url, headers=self.headers,timeout=self.timeout) + print(r.url) p = self.一级.split(';') # 解析 + if len(p) < 5: + return self.blank() + jsp = jsoup(self.url) pdfh = jsp.pdfh pdfa = jsp.pdfa @@ -125,17 +218,34 @@ class CMS: :return: """ # video-info-header - fyid = str(array[0]) - if not fyid.startswith('http'): - url = self.detailUrl.replace('fyid', fyid) + detailUrl = str(array[0]) + print(detailUrl) + if not detailUrl.startswith('http'): + url = self.detailUrl.replace('fyid', detailUrl) else: - url = fyid + url = detailUrl print(url) - headers = {'user-agent': self.ua} - r = requests.get(url, headers=headers) + r = requests.get(url, headers=self.headers,timeout=self.timeout) html = r.text # print(html) p = self.二级 # 解析 + if p == '*': + vod = self.blank_vod() + vod['vod_play_from'] = '道长在线' + vod['desc'] = detailUrl + vod['vod_actor'] = '没有二级,只有一级链接直接嗅探播放' + vod['content'] = detailUrl + vod['vod_play_url'] = '嗅探播放$'+detailUrl + result = { + 'list': [ + vod + ] + } + return result + + if not isinstance(p,dict): + return self.blank() + jsp = jsoup(self.url) pdfh = jsp.pdfh pdfa = jsp.pdfa @@ -165,7 +275,7 @@ class CMS: obj['img'] = img vod = { - "vod_id": fyid, + "vod_id": detailUrl, "vod_name": vod_name, "vod_pic": obj.get('img',''), "type_name": obj.get('title',''), @@ -212,14 +322,18 @@ class CMS: def searchContent(self, key, fypage=1): pg = str(fypage) + if not self.searchUrl: + return self.blank() url = self.searchUrl.replace('**', key).replace('fypage',pg) - if not str(url).startswith('http'): - url = urljoin(self.url,url) print(url) - headers = {'user-agent': self.ua} - r = requests.get(url, headers=headers) + r = requests.get(url, headers=self.headers) html = r.text - p = self.搜索.split(';') # 解析 + if not self.搜索: + return self.blank() + p = self.一级.split(';') if self.搜索 == '*' and self.一级 else self.搜索.split(';') # 解析 + if len(p) < 5: + return self.blank() + jsp = jsoup(self.url) pdfh = jsp.pdfh pdfa = jsp.pdfa @@ -254,7 +368,9 @@ if __name__ == '__main__': rule = ctx.eval('rule') cms = CMS(rule) print(cms.title) - print(cms.homeContent()) + # print(cms.homeContent()) + # print(cms.categoryContent('latest',1)) + print(cms.detailContent(['https://hongkongdollvideo.com/video/b22c7cb6df40a3c4.html'])) # cms.categoryContent('dianying',1) # print(cms.detailContent(['67391'])) # print(cms.searchContent('斗罗大陆')) \ No newline at end of file diff --git a/readme.md b/readme.md index d428b8861c4d509672870f9a79fe5212b63c72de..8e0f2e18c17b20d5006fbcafbb63b5c08de1409e 100644 --- a/readme.md +++ b/readme.md @@ -10,4 +10,7 @@ ### 配置相关 本地地址 clan://localhost/pycms_local.json 推荐把文件放到 /storage/emulated/0/PlutoPlayer/pycms_local.json -并且pluto要自己切换路径为PlutoPlayer \ No newline at end of file +并且pluto要自己切换路径为PlutoPlayer + +### 相关教程 +[pyquery定位](https://blog.csdn.net/Arise007/article/details/79513094) \ No newline at end of file