提交 3adf261f 编写于 作者: H hjdhnx

新增了一堆东西

上级 46091025
...@@ -89,10 +89,14 @@ def forbidden(): # put application's code here ...@@ -89,10 +89,14 @@ def forbidden(): # put application's code here
@app.route('/index') @app.route('/index')
def index(): # put application's code here def index(): # put application's code here
# logger.info("进入了首页") # logger.info("进入了首页")
sup_port = app.config.get('SUP_PORT', 9001) sup_port = app.config.get('SUP_PORT', False)
manager0 = ':'.join(getHost(0).split(':')[0:2]) + f':{sup_port}' manager0 = ':'.join(getHost(0).split(':')[0:2])
manager1 = ':'.join(getHost(1).split(':')[0:2]) + f':{sup_port}' manager1 = ':'.join(getHost(1).split(':')[0:2])
manager2 = ':'.join(getHost(2).split(':')[0:2]) + f':{sup_port}' manager2 = ':'.join(getHost(2).split(':')[0:2]).replace('https','http')
if sup_port:
manager0 += f':{sup_port}'
manager1 += f':{sup_port}'
manager2 += f':{sup_port}'
# print(manager1) # print(manager1)
# print(manager2) # print(manager2)
return render_template('index.html',getHost=getHost,manager0=manager0,manager1=manager1,manager2=manager2,is_linux=is_linux()) return render_template('index.html',getHost=getHost,manager0=manager0,manager1=manager1,manager2=manager2,is_linux=is_linux())
...@@ -243,7 +247,12 @@ def vod(): ...@@ -243,7 +247,12 @@ def vod():
if play_url: # 播放 if play_url: # 播放
jxs = getJxs() jxs = getJxs()
play_url = cms.playContent(play_url,jxs) play_url = cms.playContent(play_url,jxs)
return redirect(play_url) if isinstance(play_url,str):
return redirect(play_url)
elif isinstance(play_url,dict):
return jsonify(play_url)
else:
return play_url
if ac and t: # 一级 if ac and t: # 一级
data = cms.categoryContent(t,pg) data = cms.categoryContent(t,pg)
...@@ -337,9 +346,20 @@ def getPics(path='images'): ...@@ -337,9 +346,20 @@ def getPics(path='images'):
return pic_list return pic_list
def getJxs(path='js'): def getJxs(path='js'):
with open(f'{path}/解析.txt',encoding='utf-8') as f: with open(f'{path}/解析.conf',encoding='utf-8') as f:
data = f.read().strip() data = f.read().strip()
jxs = [{'name':dt.split(',')[0],'url':dt.split(',')[1]} for dt in data.split('\n')] jxs = []
for i in data.split('\n'):
i = i.strip()
dt = i.split(',')
if not i.startswith('#'):
jxs.append({
'name':dt[0],
'url':dt[1],
'type':dt[2] if len(dt) > 2 else 0,
})
# jxs = [{'name':dt.split(',')[0],'url':dt.split(',')[1]} for dt in data.split('\n')]
# jxs = list(filter(lambda x:not str(x['name']).strip().startswith('#'),jxs))
# print(jxs) # print(jxs)
print(f'共计{len(jxs)}条解析') print(f'共计{len(jxs)}条解析')
return jxs return jxs
......
...@@ -186,6 +186,10 @@ class CMS: ...@@ -186,6 +186,10 @@ class CMS:
pdfh = jsp.pdfh pdfh = jsp.pdfh
pdfa = jsp.pdfa pdfa = jsp.pdfa
pd = jsp.pd pd = jsp.pd
pjfh = jsp.pjfh
pjfa = jsp.pjfa
pj = jsp.pj
pq = jsp.pq pq = jsp.pq
return pdfh,pdfa,pd,pq return pdfh,pdfa,pd,pq
...@@ -409,9 +413,11 @@ class CMS: ...@@ -409,9 +413,11 @@ class CMS:
result = {} result = {}
videos = [] videos = []
jsp = jsoup(self.homeUrl) jsp = jsoup(self.homeUrl)
pdfh = jsp.pdfh is_json = str(p[0]).startswith('json:')
pdfa = jsp.pdfa pdfh = jsp.pjfh if is_json else jsp.pdfh
pd = jsp.pd pdfa = jsp.pjfa if is_json else jsp.pdfa
pd = jsp.pj if is_json else jsp.pd
print(html)
try: try:
if self.double: if self.double:
items = pdfa(html, p[0]) items = pdfa(html, p[0])
...@@ -422,7 +428,8 @@ class CMS: ...@@ -422,7 +428,8 @@ class CMS:
title = pdfh(item2, p[2]) title = pdfh(item2, p[2])
img = pd(item2, p[3]) img = pd(item2, p[3])
desc = pdfh(item2, p[4]) desc = pdfh(item2, p[4])
link = pd(item2, p[5]) links = [pd(item, p5) if not self.detailUrl else pdfh(item, p5) for p5 in p[5].split('+')]
link = '$'.join(links)
content = '' if len(p) < 7 else pdfh(item2, p[6]) content = '' if len(p) < 7 else pdfh(item2, p[6])
videos.append({ videos.append({
"vod_id": link, "vod_id": link,
...@@ -436,13 +443,16 @@ class CMS: ...@@ -436,13 +443,16 @@ class CMS:
except: except:
pass pass
else: else:
items = pdfa(html, p[0]) items = pdfa(html, p[0].replace('json:',''))
# print(items)
for item in items: for item in items:
try: try:
title = pdfh(item, p[1]) title = pdfh(item, p[1])
img = pd(item, p[2]) img = pd(item, p[2])
desc = pdfh(item, p[3]) desc = pdfh(item, p[3])
link = pd(item, p[4]) # link = pd(item, p[4])
links = [pd(item, p5) if not self.detailUrl else pdfh(item, p5) for p5 in p[4].split('+')]
link = '$'.join(links)
content = '' if len(p) < 6 else pdfh(item, p[5]) content = '' if len(p) < 6 else pdfh(item, p[5])
videos.append({ videos.append({
"vod_id": link, "vod_id": link,
...@@ -495,9 +505,10 @@ class CMS: ...@@ -495,9 +505,10 @@ class CMS:
return self.blank() return self.blank()
jsp = jsoup(self.url) jsp = jsoup(self.url)
pdfh = jsp.pdfh is_json = str(p[0]).startswith('json:')
pdfa = jsp.pdfa pdfh = jsp.pjfh if is_json else jsp.pdfh
pd = jsp.pd pdfa = jsp.pjfa if is_json else jsp.pdfa
pd = jsp.pj if is_json else jsp.pd
# print(pdfh(r.text,'body a.module-poster-item.module-item:eq(1)&&Text')) # print(pdfh(r.text,'body a.module-poster-item.module-item:eq(1)&&Text'))
# print(pdfh(r.text,'body a.module-poster-item.module-item:eq(0)')) # print(pdfh(r.text,'body a.module-poster-item.module-item:eq(0)'))
# print(pdfh(r.text,'body a.module-poster-item.module-item:first')) # print(pdfh(r.text,'body a.module-poster-item.module-item:first'))
...@@ -508,28 +519,32 @@ class CMS: ...@@ -508,28 +519,32 @@ class CMS:
r = requests.get(url, headers=self.headers, timeout=self.timeout) r = requests.get(url, headers=self.headers, timeout=self.timeout)
r.encoding = self.encoding r.encoding = self.encoding
print(r.url) print(r.url)
html = r.text # html = r.text
html = r.json() if is_json else r.text
# print(html) # print(html)
items = pdfa(html, p[0]) items = pdfa(html,p[0].replace('json:','',1))
except: except:
pass pass
# print(items)
for item in items: for item in items:
# print(item) # print(item)
try: try:
title = pdfh(item, p[1]) title = pdfh(item, p[1])
img = pd(item, p[2]) img = pd(item, p[2])
desc = pdfh(item, p[3]) desc = pdfh(item, p[3])
link = pd(item, p[4]) links = [pd(item, p4) if not self.detailUrl else pdfh(item, p4) for p4 in p[4].split('+')]
link = '$'.join(links)
content = '' if len(p) < 6 else pdfh(item, p[5]) content = '' if len(p) < 6 else pdfh(item, p[5])
# sid = self.regStr(sid, "/video/(\\S+).html") # sid = self.regStr(sid, "/video/(\\S+).html")
videos.append({ videos.append({
"vod_id": link, "vod_id": f'{fyclass}${link}' if self.detailUrl else link,# 分类,播放链接
"vod_name": title, "vod_name": title,
"vod_pic": img, "vod_pic": img,
"vod_remarks": desc, "vod_remarks": desc,
"vod_content": content, "vod_content": content,
}) })
except: except Exception as e:
print(f'发生了错误:{e}')
pass pass
result['list'] = videos result['list'] = videos
result['page'] = fypage result['page'] = fypage
...@@ -540,11 +555,11 @@ class CMS: ...@@ -540,11 +555,11 @@ class CMS:
return result return result
def detailOneVod(self,id): def detailOneVod(self,id,fyclass=''):
detailUrl = str(id) detailUrl = str(id)
vod = {} vod = {}
if not detailUrl.startswith('http'): if not detailUrl.startswith('http'):
url = self.detailUrl.replace('fyid', detailUrl) url = self.detailUrl.replace('fyid', detailUrl).replace('fyclass',fyclass)
else: else:
url = detailUrl url = detailUrl
# print(url) # print(url)
...@@ -563,20 +578,24 @@ class CMS: ...@@ -563,20 +578,24 @@ class CMS:
return vod return vod
jsp = jsoup(self.url) jsp = jsoup(self.url)
pdfh = jsp.pdfh
pdfa = jsp.pdfa is_json = p.get('is_json',False) # 二级里加is_json参数
pd = jsp.pd pdfh = jsp.pjfh if is_json else jsp.pdfh
pdfa = jsp.pjfa if is_json else jsp.pdfa
pd = jsp.pj if is_json else jsp.pd
pq = jsp.pq pq = jsp.pq
obj = {} obj = {}
vod_name = '' vod_name = ''
r = requests.get(url, headers=self.headers, timeout=self.timeout) r = requests.get(url, headers=self.headers, timeout=self.timeout)
r.encoding = self.encoding r.encoding = self.encoding
html = r.text # html = r.text
html = r.json() if is_json else r.text
# print(html) # print(html)
if p.get('title'): if p.get('title'):
p1 = p['title'].split(';') p1 = p['title'].split(';')
vod_name = pdfh(html,p1[0]).replace('\n',' ') vod_name = pdfh(html,p1[0]).replace('\n',' ')
title = '\n'.join([pdfh(html,i).replace('\n',' ') for i in p1]) # title = '\n'.join([pdfh(html,i).replace('\n',' ') for i in p1])
title = '\n'.join([','.join([pdfh(html, pp1).strip() for pp1 in i.split('+')]) for i in p1])
# print(title) # print(title)
obj['title'] = title obj['title'] = title
if p.get('desc'): if p.get('desc'):
...@@ -610,10 +629,11 @@ class CMS: ...@@ -610,10 +629,11 @@ class CMS:
vod_play_from = '$$$' vod_play_from = '$$$'
playFrom = [] playFrom = []
if p.get('tabs'): if p.get('tabs'):
vodHeader = pdfa(html,p['tabs']) vodHeader = pdfa(html,p['tabs'].split(';')[0])
# print(f'线路列表数:{len((vodHeader))}') # print(f'线路列表数:{len((vodHeader))}')
# print(vodHeader) # print(vodHeader)
vodHeader = [pq(v).text() for v in vodHeader] if not is_json:
vodHeader = [pq(v).text() for v in vodHeader]
else: else:
vodHeader = ['道长在线'] vodHeader = ['道长在线']
...@@ -625,10 +645,20 @@ class CMS: ...@@ -625,10 +645,20 @@ class CMS:
vod_tab_list = [] vod_tab_list = []
if p.get('lists'): if p.get('lists'):
for i in range(len(vodHeader)): for i in range(len(vodHeader)):
p1 = p['lists'].replace('#id',str(i)) tab_name = str(vodHeader[i])
tab_ext = p['tabs'].split(';')[1] if len(p['tabs'].split(';')) > 1 else ''
p1 = p['lists'].replace('#idv',tab_name).replace('#id',str(i))
tab_ext = tab_ext.replace('#idv',tab_name).replace('#id',str(i))
vodList = pdfa(html,p1) # 1条线路的选集列表 vodList = pdfa(html,p1) # 1条线路的选集列表
# print(vodList)
# vodList = [pq(i).text()+'$'+pd(i,'a&&href') for i in vodList] # 拼接成 名称$链接 # vodList = [pq(i).text()+'$'+pd(i,'a&&href') for i in vodList] # 拼接成 名称$链接
vodList = [pq(i).text()+'$'+self.play_url+pd(i,'a&&href') for i in vodList] # 拼接成 名称$链接 if self.play_parse: # 自动base64编码
vodList = [(pdfh(html,tab_ext) if tab_ext else tab_name)+'$'+self.play_url+base64Encode(i) for i in vodList] if is_json else\
[pq(i).text()+'$'+self.play_url+base64Encode(pd(i,'a&&href')) for i in vodList] # 拼接成 名称$链接
else:
vodList = [(pdfh(html, tab_ext) if tab_ext else tab_name) + '$' + self.play_url + i for i in
vodList] if is_json else \
[pq(i).text() + '$' + self.play_url + pd(i, 'a&&href') for i in vodList] # 拼接成 名称$链接
vlist = '#'.join(vodList) # 拼多个选集 vlist = '#'.join(vodList) # 拼多个选集
vod_tab_list.append(vlist) vod_tab_list.append(vlist)
vod_play_url = vod_play_url.join(vod_tab_list) vod_play_url = vod_play_url.join(vod_tab_list)
...@@ -652,7 +682,12 @@ class CMS: ...@@ -652,7 +682,12 @@ class CMS:
obj_list = [] obj_list = []
try: try:
for vod_url in array: for vod_url in array:
obj = thread_pool.submit(self.detailOneVod, vod_url) vod_class = ''
if vod_url.find('$') > -1:
tmp = vod_url.split('$')
vod_class = tmp[0]
vod_url = tmp[1]
obj = thread_pool.submit(self.detailOneVod, vod_url,vod_class)
obj_list.append(obj) obj_list.append(obj)
thread_pool.shutdown(wait=True) # 等待所有子线程并行完毕 thread_pool.shutdown(wait=True) # 等待所有子线程并行完毕
vod_list = [obj.result() for obj in obj_list] vod_list = [obj.result() for obj in obj_list]
...@@ -680,17 +715,19 @@ class CMS: ...@@ -680,17 +715,19 @@ class CMS:
if len(p) < 5: if len(p) < 5:
return self.blank() return self.blank()
jsp = jsoup(self.url) jsp = jsoup(self.url)
pdfh = jsp.pdfh is_json = str(p[0]).startswith('json:')
pdfa = jsp.pdfa pdfh = jsp.pjfh if is_json else jsp.pdfh
pd = jsp.pd pdfa = jsp.pjfa if is_json else jsp.pdfa
pd = jsp.pj if is_json else jsp.pd
pq = jsp.pq pq = jsp.pq
videos = [] videos = []
try: try:
r = requests.get(url, headers=self.headers,timeout=self.timeout) r = requests.get(url, headers=self.headers,timeout=self.timeout)
r.encoding = self.encoding r.encoding = self.encoding
html = r.text # html = r.text
html = r.json() if is_json else r.text
# print(html) # print(html)
if html.find('输入验证码') > -1: if not is_json and html.find('输入验证码') > -1:
cookie = verifyCode(url,self.headers,self.timeout,self.retry_count,self.ocr_api) cookie = verifyCode(url,self.headers,self.timeout,self.retry_count,self.ocr_api)
# cookie = '' # cookie = ''
if not cookie: if not cookie:
...@@ -703,7 +740,7 @@ class CMS: ...@@ -703,7 +740,7 @@ class CMS:
r.encoding = self.encoding r.encoding = self.encoding
html = r.text html = r.text
items = pdfa(html, p[0]) items = pdfa(html,p[0].replace('json:','',1))
# print(items) # print(items)
videos = [] videos = []
for item in items: for item in items:
...@@ -712,7 +749,9 @@ class CMS: ...@@ -712,7 +749,9 @@ class CMS:
title = pdfh(item, p[1]) title = pdfh(item, p[1])
img = pd(item, p[2]) img = pd(item, p[2])
desc = pdfh(item, p[3]) desc = pdfh(item, p[3])
link = pd(item, p[4]) # link = '$'.join([pd(item, p4) for p4 in p[4].split('+')])
links = [pd(item, p4) if not self.detailUrl else pdfh(item, p4) for p4 in p[4].split('+')]
link = '$'.join(links)
content = '' if len(p) < 6 else pdfh(item, p[5]) content = '' if len(p) < 6 else pdfh(item, p[5])
# sid = self.regStr(sid, "/video/(\\S+).html") # sid = self.regStr(sid, "/video/(\\S+).html")
videos.append({ videos.append({
...@@ -736,6 +775,10 @@ class CMS: ...@@ -736,6 +775,10 @@ class CMS:
# logger.info('播放免嗅地址: ' + self.play_url) # logger.info('播放免嗅地址: ' + self.play_url)
if not jxs: if not jxs:
jxs = [] jxs = []
try:
play_url = baseDecode(play_url) # 自动base64解码
except:
pass
if self.lazy: if self.lazy:
print(f'{play_url}->开始执行免嗅代码{type(self.lazy)}->{self.lazy}') print(f'{play_url}->开始执行免嗅代码{type(self.lazy)}->{self.lazy}')
t1 = time() t1 = time()
...@@ -777,6 +820,10 @@ class CMS: ...@@ -777,6 +820,10 @@ class CMS:
loader,_ = runJScode(jscode,ctx=ctx) loader,_ = runJScode(jscode,ctx=ctx)
# print(loader.toString()) # print(loader.toString())
play_url = loader.eval('input') play_url = loader.eval('input')
if isinstance(play_url,JsObjectWrapper):
play_url = play_url.to_dict()
# print(type(play_url))
# print(play_url)
logger.info(f'js免嗅耗时:{get_interval(t1)}毫秒,播放地址:{play_url}') logger.info(f'js免嗅耗时:{get_interval(t1)}毫秒,播放地址:{play_url}')
except Exception as e: except Exception as e:
logger.info(f'免嗅耗时:{get_interval(t1)}毫秒,并发生错误:{e}') logger.info(f'免嗅耗时:{get_interval(t1)}毫秒,并发生错误:{e}')
...@@ -786,12 +833,15 @@ class CMS: ...@@ -786,12 +833,15 @@ class CMS:
return play_url return play_url
if __name__ == '__main__': if __name__ == '__main__':
print(urljoin('https://api.web.360kan.com/v1/f',
'//0img.hitv.com/preview/sp_images/2022/01/28/202201281528074643023.jpg'))
# exit()
from utils import parser from utils import parser
# js_path = f'js/玩偶姐姐.js' # js_path = f'js/玩偶姐姐.js'
# js_path = f'js/555影视.js' # js_path = f'js/555影视.js'
with open('../js/模板.js', encoding='utf-8') as f: with open('../js/模板.js', encoding='utf-8') as f:
before = f.read() before = f.read()
js_path = f'js/vip影院.js' js_path = f'js/360影视.js'
ctx, js_code = parser.runJs(js_path,before=before) ctx, js_code = parser.runJs(js_path,before=before)
ruleDict = ctx.rule.to_dict() ruleDict = ctx.rule.to_dict()
# lazy = ctx.eval('lazy') # lazy = ctx.eval('lazy')
...@@ -807,4 +857,5 @@ if __name__ == '__main__': ...@@ -807,4 +857,5 @@ if __name__ == '__main__':
# print(cms.detailContent(1,['https://cokemv.me/voddetail/40573.html'])) # print(cms.detailContent(1,['https://cokemv.me/voddetail/40573.html']))
# cms.categoryContent('dianying',1) # cms.categoryContent('dianying',1)
# print(cms.detailContent(['67391'])) # print(cms.detailContent(['67391']))
print(cms.searchContent('斗罗大陆')) # print(cms.searchContent('斗罗大陆'))
\ No newline at end of file print(cms.searchContent('独行月球'))
\ No newline at end of file
var rule = {
title:'360影视',
host:'https://www.360kan.com',
homeUrl:'https://api.web.360kan.com/v1/rank?cat=2&size=9',
detailUrl:'https://api.web.360kan.com/v1/detail?cat=fyclass&id=fyid',
searchUrl:'https://api.so.360kan.com/index?force_v=1&kw=**&from=&pageno=fypage&v_ap=1&tab=all',
url:'https://api.web.360kan.com/v1/filter/list?catid=fyclass&rank=rankhot&cat=&year=&area=&act=&size=35&pageno=fypage&callback=',
headers:{
'User-Agent':'MOBILE_UA'
},
timeout:5000,
class_name:'电视剧&电影&综艺&动漫',
class_url:'2&1&3&4',
limit:5,
play_parse:true,
// play_parse:true,
lazy:'js:input={parse: 1, playUrl: "", jx: 1, url: input}',
推荐:'json:data;title;cover;comment;cat+ent_id;description',
一级:'json:data.movies;title;cover;pubdate;id;description',
二级:{is_json:1,"title":"data.title;data.moviecategory[0]+data.moviecategory[1]","img":"data.cdncover","desc":"data.area[0];data.director[0]","content":"data.description","tabs":"data.playlink_sites;data.playlinksdetail.#idv.quality","lists":"data.playlinksdetail.#idv.default_url"},
// 二级:{is_json:1,"title":"data.title;data.moviecategory[0]+data.moviecategory[1]","img":"data.cdncover","desc":"data.area[0];data.director[0]","content":"data.description","tabs":"data.playlink_sites","lists":"data.playlinksdetail.#idv.default_url"},
搜索:'json:data.longData.rows;titleTxt;cover;score;cat_id+id;description',
}
\ No newline at end of file
# 0123,对应,普通解析,json解析,并发解析,聚合解析,参数3不填默认0
BT5V,https://rx.bt5v.com/json/jsonindex.php/?url=,1
爱酷,https://cache.json.icu/home/api?type=ys&uid=292796&key=fnoryABDEFJNPQV269&url=,1
# m3u8tv,https://jx.m3u8.tv/jiexi/?url=
# 思古解析,https://jsap.attakids.com/?url=
# 云解析,https://jx.ppflv.com/?url=
# 云解析2,https://jx.aidouer.net/?url=
# BL解析,https://vip.bljiex.cc/?v=
# 虾米解析,https://jx.xmflv.com/?url=
# 飞飞智能,https://y.9dan.cc/?v=
# 左岸解析,https://jx.bozrc.com:4433/player/?url=
# ok解析,https://okjx.cc/?url=
# 8090解析,https://www.8090g.cn/?url=
# ckplayer无广,https://www.ckplayer.vip/jiexi/?url=
# 盘古无广,http://www.pangujiexi.cc/jiexi.php?url=
# 江湖,http://jx.vipmv.co/?url=
\ No newline at end of file
m3u8tv,https://jx.m3u8.tv/jiexi/?url=
思古解析,https://jsap.attakids.com/?url=
云解析,https://jx.ppflv.com/?url=
云解析2,https://jx.aidouer.net/?url=
BL解析,https://vip.bljiex.cc/?v=
虾米解析,https://jx.xmflv.com/?url=
飞飞智能,https://y.9dan.cc/?v=
左岸解析,https://jx.bozrc.com:4433/player/?url=
ok解析,https://okjx.cc/?url=
8090解析,https://www.8090g.cn/?url=
ckplayer无广,https://www.ckplayer.vip/jiexi/?url=
盘古无广,http://www.pangujiexi.cc/jiexi.php?url=
江湖,http://jx.vipmv.co/?url=
\ No newline at end of file
...@@ -9,4 +9,5 @@ gevent ; python_version < '3.9' ...@@ -9,4 +9,5 @@ gevent ; python_version < '3.9'
gunicorn ; python_version >= '3.6' gunicorn ; python_version >= '3.6'
supervisor ; sys_platform != 'win32' supervisor ; sys_platform != 'win32'
func_timeout func_timeout
easydict easydict
\ No newline at end of file jsonpath
\ No newline at end of file
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
{% if config.WALL_PAPER_ENABLE %}"wallpaper":"{{ host }}/pics",{% endif %} {% if config.WALL_PAPER_ENABLE %}"wallpaper":"{{ host }}/pics",{% endif %}
"dr_count": {{rules.list|length}}, "dr_count": {{rules.list|length}},
"mode": {{ mode }}, "mode": {{ mode }},
"homepage":"https://gitcode.net/qq_32394351/dr_py",
"sites": [{% for rule in rules.list %}{% if mode == 0 %} "sites": [{% for rule in rules.list %}{% if mode == 0 %}
{ {
"key":"dr_{{ rule.name }}", "key":"dr_{{ rule.name }}",
...@@ -38,9 +39,9 @@ ...@@ -38,9 +39,9 @@
{ {
"name": "{{ jx.name }}", "name": "{{ jx.name }}",
"url": "{{ jx.url }}", "url": "{{ jx.url }}",
"type": 1, "type": {{ jx.type }},
"ext": { "ext": {
"flag": ["qiyi", "爱奇艺", "奇艺", "qq", "腾讯", "youku", "优酷", "pptv", "PPTV", "letv", "乐视", "bilibili", "哔哩哔哩", "哔哩", "mgtv", "芒果","sohu", "xigua"], "flag": ["qiyi","imgo","爱奇艺", "奇艺", "qq", "腾讯", "youku", "优酷", "pptv", "PPTV", "letv", "乐视", "bilibili", "哔哩哔哩", "哔哩", "mgtv", "芒果","sohu", "xigua"],
"header": { "header": {
"User-Agent": "Dart/2.14 (dart:io)" "User-Agent": "Dart/2.14 (dart:io)"
} }
......
...@@ -30,3 +30,5 @@ https://cuiqingcai.com/202232.html ...@@ -30,3 +30,5 @@ https://cuiqingcai.com/202232.html
"lives":[{"group":"redirect","channels":[{"name":"直播","urls":["proxy://do=live&type=txt&ext={% if config.LIVE_MODE==0 %}{{base64Encode(host+'/lives')}}{% else %}{{ base64Encode('https://gitcode.net/qq_26898231/TVBox/-/raw/main/live/0830zb.txt')}}{% endif %}"]}]}], "lives":[{"group":"redirect","channels":[{"name":"直播","urls":["proxy://do=live&type=txt&ext={% if config.LIVE_MODE==0 %}{{base64Encode(host+'/lives')}}{% else %}{{ base64Encode('https://gitcode.net/qq_26898231/TVBox/-/raw/main/live/0830zb.txt')}}{% endif %}"]}]}],
"homepage":"https://gitcode.net/qq_32394351/dr_py",
"imgo",
\ No newline at end of file
...@@ -3,21 +3,23 @@ ...@@ -3,21 +3,23 @@
# File : htmlParser.py # File : htmlParser.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------ # Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date : 2022/8/25 # Date : 2022/8/25
import json
from pyquery import PyQuery as pq from pyquery import PyQuery as pq
from urllib.parse import urljoin from urllib.parse import urljoin
import re import re
from jsonpath import jsonpath
class jsoup: class jsoup:
def __init__(self,MY_URL=''): def __init__(self,MY_URL=''):
self.MY_URL = MY_URL self.MY_URL = MY_URL
def test(self, text, string): def test(self, text:str, string:str):
searchObj = re.search(rf'{text}', string, re.M | re.I) searchObj = re.search(rf'{text}', string, re.M | re.I)
test_ret = True if searchObj else False test_ret = True if searchObj else False
return test_ret return test_ret
def pdfh(self,html,parse,pd=False): def pdfh(self,html,parse:str,add_url=False):
if not parse: if not parse:
return '' return ''
doc = pq(html) doc = pq(html)
...@@ -40,7 +42,7 @@ class jsoup: ...@@ -40,7 +42,7 @@ class jsoup:
ret = ret.html() ret = ret.html()
else: else:
ret = ret.attr(option) ret = ret.attr(option)
if pd and option in ['url','src','href','data-original','data-src']: if add_url and option in ['url','src','href','data-original','data-src']:
ret = urljoin(self.MY_URL,ret) ret = urljoin(self.MY_URL,ret)
else: else:
# ret = doc(parse+':first') # ret = doc(parse+':first')
...@@ -52,7 +54,7 @@ class jsoup: ...@@ -52,7 +54,7 @@ class jsoup:
ret = str(ret) ret = str(ret)
return ret return ret
def pdfa(self,html,parse): def pdfa(self,html,parse:str):
if not parse: if not parse:
return [] return []
if parse.find('&&') > -1: if parse.find('&&') > -1:
...@@ -64,12 +66,58 @@ class jsoup: ...@@ -64,12 +66,58 @@ class jsoup:
# return [item.html() for item in doc(parse).items()] # return [item.html() for item in doc(parse).items()]
return [str(item) for item in doc(parse).items()] return [str(item) for item in doc(parse).items()]
def pd(self,html,parse): def pd(self,html,parse:str):
return self.pdfh(html,parse,True) return self.pdfh(html,parse,True)
def pq(self,html): def pq(self,html:str):
return pq(html) return pq(html)
def pjfh(self,html,parse:str,add_url=False):
if not parse:
return ''
if isinstance(html,str):
# print(html)
try:
html = json.loads(html)
# html = eval(html)
except:
print('字符串转json失败')
return ''
if not parse.startswith('$.'):
parse = f'$.{parse}'
ret = jsonpath(html,parse)
if isinstance(ret,list):
ret = str(ret[0]) if ret[0] else ''
else:
ret = str(ret) if ret else ''
if add_url:
ret = urljoin(self.MY_URL, ret)
return ret
def pj(self, html, parse:str):
return self.pjfh(html, parse, True)
def pjfa(self,html,parse:str):
if not parse:
return []
if isinstance(html,str):
try:
html = json.loads(html)
except:
return ''
if not parse.startswith('$.'):
parse = f'$.{parse}'
# print(parse)
ret = jsonpath(html,parse)
# print(ret)
# print(type(ret))
# print(type(ret[0]))
# print(len(ret))
if isinstance(ret,list) and isinstance(ret[0],list) and len(ret) == 1:
# print('自动解包')
ret = ret[0] # 自动解包
return ret or []
if __name__ == '__main__': if __name__ == '__main__':
import requests import requests
from parsel import Selector from parsel import Selector
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册