diff --git a/base/rules.db b/base/rules.db index 75874850d7c542d04728639ca2b09aa29337c625..694e59bdb2e7939026e4882a34478f0b565247b1 100644 Binary files a/base/rules.db and b/base/rules.db differ diff --git a/controllers/cms.py b/controllers/cms.py index 72dc87a3e8fd908e9cda9b020ddc6382447601bf..4850cb7096a07a3f3ed8eddf29c5fd89fed150e6 100644 --- a/controllers/cms.py +++ b/controllers/cms.py @@ -191,6 +191,7 @@ class CMS: 'getParse':self.getParse, 'saveParse':self.saveParse, 'oheaders':self.oheaders, + 'headers':self.headers, # 通用免嗅需要 'encoding':self.encoding, 'name':self.title, 'timeout':self.timeout, @@ -763,9 +764,171 @@ class CMS: return result + def 二级渲染(self,parse_str:'str|dict',**kwargs): + # *args是不定长参数 列表 + # ** args是不定长参数字典 + p = parse_str # 二级传递解析表达式 js的obj json对象 + detailUrl = kwargs.get('detailUrl','') # 不定长字典传递的二级详情页vod_id原始数据 + url = kwargs.get('url','') # 不定长字典传递的二级详情页链接智能拼接数据 + vod = kwargs.get('vod',self.blank_vod()) # 最终要返回的二级详情页数据 默认空 + html = kwargs.get('html','') # 不定长字典传递的源码(如果不传才会在下面程序中去获取) + show_name = kwargs.get('show_name','') # 是否显示来源(用于drpy区分) + jsp = kwargs.get('jsp','') # jsp = jsoup(self.url) 传递的jsp解析 + fyclass = kwargs.get('fyclass','') # 二级传递的分类名称,可以得知进去的类别 + if p == '*': # 解析表达式为*默认一级直接变播放 + vod['vod_play_from'] = '道长在线' + vod['vod_remarks'] = detailUrl + vod['vod_actor'] = '没有二级,只有一级链接直接嗅探播放' + # vod['vod_content'] = url if not show_name else f'({self.id}) {url}' + vod['vod_content'] = url + vod['vod_play_url'] = '嗅探播放$' + self.play_url + url + + elif not p or (not isinstance(p, dict) and not isinstance(p, str)) or (isinstance(p, str) and not str(p).startswith('js:')): + pass + else: + is_json = p.get('is_json', False) if isinstance(p, dict) else False # 二级里加is_json参数 + pdfh = jsp.pjfh if is_json else jsp.pdfh + pdfa = jsp.pjfa if is_json else jsp.pdfa + pd = jsp.pj if is_json else jsp.pd + pq = jsp.pq + obj = {} + vod_name = '' + if not html: # 没传递html参数接下来智能获取 + r = requests.get(url, headers=self.headers, timeout=self.timeout) + html = self.checkHtml(r) + if is_json: + html = self.dealJson(html) + html = json.loads(html) + if p.get('title'): + p1 = p['title'].split(';') + vod_name = pdfh(html, p1[0]).replace('\n', ' ') + # title = '\n'.join([pdfh(html,i).replace('\n',' ') for i in p1]) + title = '\n'.join([','.join([pdfh(html, pp1).strip() for pp1 in i.split('+')]) for i in p1]) + # print(title) + obj['title'] = title + if p.get('desc'): + try: + p1 = p['desc'].split(';') + desc = '\n'.join([pdfh(html, i).replace('\n', ' ') for i in p1]) + obj['desc'] = desc + except: + pass + + if p.get('content'): + p1 = p['content'].split(';') + try: + content = '\n'.join([pdfh(html, i).replace('\n', ' ') for i in p1]) + obj['content'] = content + except: + pass + + if p.get('img'): + p1 = p['img'] + try: + img = pd(html, p1) + obj['img'] = img + except Exception as e: + logger.info(f'二级图片定位失败,但不影响使用{e}') + + vod = { + "vod_id": detailUrl, + "vod_name": vod_name, + "vod_pic": obj.get('img', ''), + "type_name": obj.get('title', ''), + "vod_year": "", + "vod_area": "", + "vod_remarks": obj.get('desc', ''), + "vod_actor": "", + "vod_director": "", + "vod_content": obj.get('content', '') + } + + vod_play_from = '$$$' + playFrom = [] + if p.get('重定向') and str(p['重定向']).startswith('js:'): + headers['Referer'] = getHome(url) + py_ctx.update({ + 'input': url, + 'html': html, + 'TYPE': 'detail', # 海阔js环境标志 + 'cateID': fyclass, # 当前分类 + 'oheaders': self.d.oheaders, + 'fetch_params': {'headers': self.headers, 'timeout': self.d.timeout, 'encoding': self.d.encoding}, + 'd': self.d, + 'getParse': self.d.getParse, + 'saveParse': self.d.saveParse, + 'jsp': jsp, 'setDetail': setDetail, + }) + ctx = py_ctx + # print(ctx) + rcode = p['重定向'].replace('js:', '', 1) + jscode = getPreJs() + rcode + # print(jscode) + loader, _ = runJScode(jscode, ctx=ctx) + # print(loader.toString()) + logger.info(f'开始执行二级重定向代码:{rcode}') + html = loader.eval('html') + if isinstance(vod, JsObjectWrapper): + html = str(html) + + if p.get('tabs'): + vodHeader = [] + # print(p['tabs'].split(';')[0]) + vHeader = pdfa(html, p['tabs'].split(';')[0]) + # print(f'线路列表数:{len((vodHeader))}') + # print(vodHeader) + if not is_json: + for v in vHeader: + # 过滤排除掉线路标题 + v_title = pq(v).text() + if self.tab_exclude and jsp.test(self.tab_exclude, v_title): + continue + vodHeader.append(v_title) + else: + vodHeader = vHeader + else: + vodHeader = ['道长在线'] + + # print(vodHeader) + + for v in vodHeader: + playFrom.append(v) + vod_play_from = vod_play_from.join(playFrom) + + vod_play_url = '$$$' + vod_tab_list = [] + if p.get('lists'): + for i in range(len(vodHeader)): + tab_name = str(vodHeader[i]) + tab_ext = p['tabs'].split(';')[1] if len(p['tabs'].split(';')) > 1 else '' + p1 = p['lists'].replace('#idv', tab_name).replace('#id', str(i)) + tab_ext = tab_ext.replace('#idv', tab_name).replace('#id', str(i)) + vodList = pdfa(html, p1) # 1条线路的选集列表 + # print(vodList) + # vodList = [pq(i).text()+'$'+pd(i,'a&&href') for i in vodList] # 拼接成 名称$链接 + if self.play_parse: # 自动base64编码 + vodList = [(pdfh(html, tab_ext) if tab_ext else tab_name) + '$' + self.play_url + encodeUrl(i) for i + in vodList] if is_json else \ + [pq(i).text() + '$' + self.play_url + encodeUrl(pd(i, 'a&&href')) for i in vodList] # 拼接成 名称$链接 + else: + vodList = [(pdfh(html, tab_ext) if tab_ext else tab_name) + '$' + self.play_url + i for i in + vodList] if is_json else \ + [pq(i).text() + '$' + self.play_url + pd(i, 'a&&href') for i in vodList] # 拼接成 名称$链接 + vlist = '#'.join(vodList) # 拼多个选集 + vod_tab_list.append(vlist) + vod_play_url = vod_play_url.join(vod_tab_list) + # print(vod_play_url) + vod['vod_play_from'] = vod_play_from + # print(vod_play_from) + vod['vod_play_url'] = vod_play_url + + if show_name: + vod['vod_content'] = f'({self.id}){vod.get("vod_content", "")}' + return vod + def detailOneVod(self,id,fyclass='',show_name=False): + vod = self.blank_vod() detailUrl = str(id) - vod = {} if not detailUrl.startswith('http') and not '/' in detailUrl: url = self.detailUrl.replace('fyid', detailUrl).replace('fyclass',fyclass) elif '/' in detailUrl: @@ -775,32 +938,14 @@ class CMS: logger.info(f'进入详情页: {url}') try: p = self.二级 # 解析 - if p == '*': - vod = self.blank_vod() - vod['vod_play_from'] = '道长在线' - vod['vod_remarks'] = detailUrl - vod['vod_actor'] = '没有二级,只有一级链接直接嗅探播放' - vod['vod_content'] = url if not show_name else f'({self.id}) {url}' - vod['vod_play_url'] = '嗅探播放$'+self.play_url+url - print(vod) - return vod - - if not p: - return vod - if not isinstance(p,dict) and not isinstance(p,str): - return vod - if isinstance(p,str) and not str(p).startswith('js:'): - return vod - jsp = jsoup(self.url) - - is_json = p.get('is_json',False) if isinstance(p,dict) else False # 二级里加is_json参数 is_js = isinstance(p,str) and str(p).startswith('js:') # 是js if is_js: headers['Referer'] = getHome(url) py_ctx.update({ 'input': url, 'TYPE': 'detail', # 海阔js环境标志 + '二级': self.二级渲染, # 二级解析函数,可以解析dict 'cateID': fyclass, # 当前分类 'oheaders': self.d.oheaders, 'fetch_params': {'headers': self.headers, 'timeout': self.d.timeout, 'encoding': self.d.encoding}, @@ -818,148 +963,15 @@ class CMS: vod = loader.eval('vod') if isinstance(vod,JsObjectWrapper): vod = vod.to_dict() + if show_name: + vod['vod_content'] = f'({self.id}){vod.get("vod_content", "")}' else: - vod = {} - # print(type(vod)) - # print(vod) + vod = self.blank_vod() else: - pdfh = jsp.pjfh if is_json else jsp.pdfh - pdfa = jsp.pjfa if is_json else jsp.pdfa - pd = jsp.pj if is_json else jsp.pd - pq = jsp.pq - obj = {} - vod_name = '' - r = requests.get(url, headers=self.headers, timeout=self.timeout) - html = self.checkHtml(r) - if is_json: - html = self.dealJson(html) - html = json.loads(html) - if p.get('title'): - p1 = p['title'].split(';') - vod_name = pdfh(html,p1[0]).replace('\n',' ') - # title = '\n'.join([pdfh(html,i).replace('\n',' ') for i in p1]) - title = '\n'.join([','.join([pdfh(html, pp1).strip() for pp1 in i.split('+')]) for i in p1]) - # print(title) - obj['title'] = title - if p.get('desc'): - try: - p1 = p['desc'].split(';') - desc = '\n'.join([pdfh(html,i).replace('\n',' ') for i in p1]) - obj['desc'] = desc - except: - pass - - if p.get('content'): - p1 = p['content'].split(';') - try: - content = '\n'.join([pdfh(html,i).replace('\n',' ') for i in p1]) - obj['content'] = content - except: - pass - - if p.get('img'): - p1 = p['img'] - try: - img = pd(html,p1) - obj['img'] = img - except Exception as e: - logger.info(f'二级图片定位失败,但不影响使用{e}') - - vod = { - "vod_id": detailUrl, - "vod_name": vod_name, - "vod_pic": obj.get('img',''), - "type_name": obj.get('title',''), - "vod_year": "", - "vod_area": "", - "vod_remarks": obj.get('desc',''), - "vod_actor": "", - "vod_director": "", - "vod_content": obj.get('content','') - } - - vod_play_from = '$$$' - playFrom = [] - if p.get('重定向') and str(p['重定向']).startswith('js:'): - headers['Referer'] = getHome(url) - py_ctx.update({ - 'input': url, - 'html': html, - 'TYPE': 'detail', # 海阔js环境标志 - 'cateID': fyclass, # 当前分类 - 'oheaders': self.d.oheaders, - 'fetch_params': {'headers': self.headers, 'timeout': self.d.timeout, 'encoding': self.d.encoding}, - 'd': self.d, - 'getParse': self.d.getParse, - 'saveParse': self.d.saveParse, - 'jsp': jsp, 'setDetail': setDetail, - }) - ctx = py_ctx - # print(ctx) - rcode = p['重定向'].replace('js:', '', 1) - jscode = getPreJs() + rcode - # print(jscode) - loader, _ = runJScode(jscode, ctx=ctx) - # print(loader.toString()) - logger.info(f'开始执行二级重定向代码:{rcode}') - html = loader.eval('html') - if isinstance(vod, JsObjectWrapper): - html = str(html) - - if p.get('tabs'): - vodHeader = [] - # print(p['tabs'].split(';')[0]) - vHeader = pdfa(html,p['tabs'].split(';')[0]) - # print(f'线路列表数:{len((vodHeader))}') - # print(vodHeader) - if not is_json: - for v in vHeader: - # 过滤排除掉线路标题 - v_title = pq(v).text() - if self.tab_exclude and jsp.test(self.tab_exclude, v_title): - continue - vodHeader.append(v_title) - else: - vodHeader = vHeader - else: - vodHeader = ['道长在线'] - - # print(vodHeader) - - for v in vodHeader: - playFrom.append(v) - vod_play_from = vod_play_from.join(playFrom) - - vod_play_url = '$$$' - vod_tab_list = [] - if p.get('lists'): - for i in range(len(vodHeader)): - tab_name = str(vodHeader[i]) - tab_ext = p['tabs'].split(';')[1] if len(p['tabs'].split(';')) > 1 else '' - p1 = p['lists'].replace('#idv',tab_name).replace('#id',str(i)) - tab_ext = tab_ext.replace('#idv',tab_name).replace('#id',str(i)) - vodList = pdfa(html,p1) # 1条线路的选集列表 - # print(vodList) - # vodList = [pq(i).text()+'$'+pd(i,'a&&href') for i in vodList] # 拼接成 名称$链接 - if self.play_parse: # 自动base64编码 - vodList = [(pdfh(html,tab_ext) if tab_ext else tab_name)+'$'+self.play_url+encodeUrl(i) for i in vodList] if is_json else\ - [pq(i).text()+'$'+self.play_url+encodeUrl(pd(i,'a&&href')) for i in vodList] # 拼接成 名称$链接 - else: - vodList = [(pdfh(html, tab_ext) if tab_ext else tab_name) + '$' + self.play_url + i for i in - vodList] if is_json else \ - [pq(i).text() + '$' + self.play_url + pd(i, 'a&&href') for i in vodList] # 拼接成 名称$链接 - vlist = '#'.join(vodList) # 拼多个选集 - vod_tab_list.append(vlist) - vod_play_url = vod_play_url.join(vod_tab_list) - # print(vod_play_url) - vod['vod_play_from'] = vod_play_from - # print(vod_play_from) - vod['vod_play_url'] = vod_play_url - # print(vod_play_url) + vod = self.二级渲染(p,detailUrl=detailUrl,url=url,vod=vod,show_name=show_name,jsp=jsp,fyclass=fyclass) except Exception as e: logger.info(f'{self.getName()}获取单个详情页{detailUrl}出错{e}') - if show_name: - vod['vod_content'] = f'({self.id}){vod.get("vod_content","")}' + return vod def detailContent(self, fypage, array,show_name=False): diff --git "a/js/\346\250\241\346\235\277.js" "b/js/\346\250\241\346\235\277.js" index 3c1925adc306c3ff5bcdca3bb285ed0360f5fe72..af2945faf38099a483066467e576725613d5db87 100644 --- "a/js/\346\250\241\346\235\277.js" +++ "b/js/\346\250\241\346\235\277.js" @@ -68,7 +68,8 @@ var mubanDict = { // 模板字典 'User-Agent':'UC_UA', // "Cookie": "" }, - class_parse:'.stui-header__menu li:gt(0):lt(7);a&&Text;a&&href;/(\\d+).html', + // class_parse:'.stui-header__menu li:gt(0):lt(7);a&&Text;a&&href;/(\\d+).html', + class_parse:'.stui-header__menu li:gt(0):lt(7);a&&Text;a&&href;.*/(.*?).html', play_parse:true, lazy:'', limit:6, @@ -76,7 +77,9 @@ var mubanDict = { // 模板字典 double:true, // 推荐内容是否双层定位 一级:'.stui-vodlist li;a&&title;a&&data-original;.pic-text&&Text;a&&href', 二级:{"title":".stui-content__detail .title&&Text;.stui-content__detail p:eq(-2)&&Text","img":".stui-content__thumb .lazyload&&data-original","desc":".stui-content__detail p:eq(0)&&Text;.stui-content__detail p:eq(1)&&Text;.stui-content__detail p:eq(2)&&Text","content":".detail&&Text","tabs":".stui-vodlist__head h3","lists":".stui-content__playlist:eq(#id) li"}, - 搜索:'#searchList li;a&&title;.lazyload&&data-original;.text-muted&&Text;a&&href;.text-muted:eq(-1)&&Text', + // 搜索:'#searchList li;a&&title;.lazyload&&data-original;.text-muted&&Text;a&&href;.text-muted:eq(-1)&&Text', + 搜索:'ul.stui-vodlist&&li;a&&title;.lazyload&&data-original;.text-muted&&Text;a&&href;.text-muted:eq(-1)&&Text', + // 搜索:'ul.stui-vodlist__media&&li;a&&title;.lazyload&&data-original;.text-muted&&Text;a&&href;.text-muted:eq(-1)&&Text', }, vfed:{ title:'', diff --git "a/js/\347\216\204\345\244\251.js" "b/js/\347\216\204\345\244\251.js" new file mode 100644 index 0000000000000000000000000000000000000000..22808cd455a2b6c5b2abba88e85609e1be281d95 --- /dev/null +++ "b/js/\347\216\204\345\244\251.js" @@ -0,0 +1,8 @@ +muban.首图2.二级.tabs = '.dropdown-menu li'; +var rule = Object.assign(muban.首图2,{ + title:'玄天', + host:'https://m.7caa.com', + url:'/list/fyclass-fypage.html', + searchUrl:'/search/**----------fypage---.html', + lazy:'通用免嗅' +}); \ No newline at end of file diff --git "a/txt/js/tg/\347\232\256\347\232\256\346\263\241.js" "b/txt/js/tg/\347\232\256\347\232\256\346\263\241.js" index 359c54b4f2ec7e338398ac26aa7b90e4f5b54b00..f393ea5fb46564ecea06e613132df93efefa1a2c 100644 --- "a/txt/js/tg/\347\232\256\347\232\256\346\263\241.js" +++ "b/txt/js/tg/\347\232\256\347\232\256\346\263\241.js" @@ -7,4 +7,5 @@ searchUrl:'/vodsearch**/page/fypage.html', class_name:'电视剧&电影&综艺&动漫&纪录片',//静态分类名称拼接 class_url:'dianshiju&dianying&zongyi&dongman&jilupian',//静态分类标识拼接 class_parse:' ', +搜索:'ul.stui-vodlist__media&&li;a&&title;.lazyload&&data-original;.text-muted&&Text;a&&href;.text-muted:eq(-1)&&Text', }); \ No newline at end of file