提交 e6fae6eb 编写于 作者: H hjdhnx

优化360搜索,搜索支持js:

上级 df213a9f
......@@ -818,67 +818,99 @@ class CMS:
logger.info(f'{self.getName()}搜索链接:{url}')
if not self.搜索:
return self.blank()
p = self.一级.split(';') if self.搜索 == '*' and self.一级 else self.搜索.split(';') # 解析
if len(p) < 5:
return self.blank()
# p = self.一级.split(';') if self.搜索 == '*' and self.一级 else self.搜索.split(';') # 解析
p = self.一级 if self.搜索 == '*' and self.一级 else self.搜索
jsp = jsoup(self.url)
is_json = str(p[0]).startswith('json:')
pdfh = jsp.pjfh if is_json else jsp.pdfh
pdfa = jsp.pjfa if is_json else jsp.pdfa
pd = jsp.pj if is_json else jsp.pd
pq = jsp.pq
videos = []
try:
r = requests.get(url, headers=self.headers,timeout=self.timeout)
html = self.checkHtml(r)
if is_json:
html = json.loads(html)
# print(html)
if not is_json and html.find('输入验证码') > -1:
cookie = verifyCode(url,self.headers,self.timeout,self.retry_count,self.ocr_api)
# cookie = ''
if not cookie:
return {
'list': videos
}
self.saveCookie(cookie)
self.headers['cookie'] = cookie
r = requests.get(url, headers=self.headers, timeout=self.timeout)
r.encoding = self.encoding
html = r.text
is_js = isinstance(p, str) and str(p).startswith('js:') # 是js
if is_js:
headers['Referer'] = getHome(url)
py_ctx.update({
'input': url,
'fetch_params': {'headers': headers, 'timeout': self.d.timeout, 'encoding': self.d.encoding},
'd': self.d,
'KEY': key, # 搜索关键字
'detailUrl': self.detailUrl or '', # 详情页链接
'getParse': self.d.getParse,
'saveParse': self.d.saveParse,
'jsp': jsp, 'setDetail': setDetail,
})
ctx = py_ctx
# print(ctx)
jscode = getPreJs() + p.replace('js:', '', 1)
# print(jscode)
loader, _ = runJScode(jscode, ctx=ctx)
# print(loader.toString())
vods = loader.eval('VODS')
# print(vods)
if isinstance(vods, JsObjectWrapper):
videos = vods.to_list()
else:
p = p.split(';')
if len(p) < 5:
return self.blank()
is_json = str(p[0]).startswith('json:')
pdfh = jsp.pjfh if is_json else jsp.pdfh
pdfa = jsp.pjfa if is_json else jsp.pdfa
pd = jsp.pj if is_json else jsp.pd
pq = jsp.pq
try:
r = requests.get(url, headers=self.headers,timeout=self.timeout)
html = self.checkHtml(r)
if is_json:
html = json.loads(html)
# print(html)
if not is_json and html.find('输入验证码') > -1:
cookie = verifyCode(url,self.headers,self.timeout,self.retry_count,self.ocr_api)
# cookie = ''
if not cookie:
return {
'list': videos
}
self.saveCookie(cookie)
self.headers['cookie'] = cookie
r = requests.get(url, headers=self.headers, timeout=self.timeout)
r.encoding = self.encoding
html = r.text
items = pdfa(html,p[0].replace('json:','',1))
print(items)
videos = []
for item in items:
# print(item)
try:
title = pdfh(item, p[1])
try:
img = pd(item, p[2])
except:
img = ''
items = pdfa(html,p[0].replace('json:','',1))
# print(items)
videos = []
for item in items:
# print(item)
try:
desc = pdfh(item, p[3])
# title = pdfh(item, p[1])
title =''.join([pdfh(item, i) for i in p[1].split('||')])
try:
img = pd(item, p[2])
except:
img = ''
try:
desc = pdfh(item, p[3])
except:
desc = ''
try:
content = '' if len(p) < 6 else pdfh(item, p[5])
except:
content = ''
# link = '$'.join([pd(item, p4) for p4 in p[4].split('+')])
links = [pd(item, p4) if not self.detailUrl else pdfh(item, p4) for p4 in p[4].split('+')]
link = '$'.join(links)
# print(content)
# sid = self.regStr(sid, "/video/(\\S+).html")
videos.append({
"vod_id": link,
"vod_name": title,
"vod_pic": img,
"vod_remarks": desc,
"vod_content": content, # 无用参数
})
except:
desc = ''
# link = '$'.join([pd(item, p4) for p4 in p[4].split('+')])
links = [pd(item, p4) if not self.detailUrl else pdfh(item, p4) for p4 in p[4].split('+')]
link = '$'.join(links)
content = '' if len(p) < 6 else pdfh(item, p[5])
# sid = self.regStr(sid, "/video/(\\S+).html")
videos.append({
"vod_id": link,
"vod_name": title,
"vod_pic": img,
"vod_remarks": desc,
"vod_content": content, # 无用参数
})
except:
pass
# print(videos)
except Exception as e:
logger.info(f'搜索{self.getName()}发生错误:{e}')
pass
# print(videos)
except Exception as e:
logger.info(f'搜索{self.getName()}发生错误:{e}')
result = {
'list': videos
}
......
......@@ -4,6 +4,8 @@ var rule = {
homeUrl:'https://api.web.360kan.com/v1/rank?cat=2&size=9',
detailUrl:'https://api.web.360kan.com/v1/detail?cat=fyclass&id=fyid',
searchUrl:'https://api.so.360kan.com/index?force_v=1&kw=**&from=&pageno=fypage&v_ap=1&tab=all',
searchable:1,
quickSearch:1,
url:'https://api.web.360kan.com/v1/filter/list?catid=fyclass&rank=rankhot&cat=&year=&area=&act=&size=35&pageno=fypage&callback=',
headers:{
'User-Agent':'MOBILE_UA'
......@@ -23,5 +25,6 @@ var rule = {
// 二级:{is_json:1,"title":"data.title;data.moviecategory[0]+data.moviecategory[1]","img":"data.cdncover","desc":"data.area[0];data.director[0]","content":"data.description","tabs":"data.playlink_sites;data.playlinksdetail.#idv.quality","lists":"data.playlinksdetail.#idv.default_url"},
// 二级:{is_json:1,"title":"data.title;data.moviecategory[0]+data.moviecategory[1]","img":"data.cdncover","desc":"data.area[0];data.director[0]","content":"data.description","tabs":"data.playlink_sites","lists":"data.playlinksdetail.#idv.default_url"},
二级:'js:let html=JSON.parse(fetch(input,fetch_params));let data=html.data;let tilte=data.title;let img=data.cdncover;let vod_type=data.moviecategory.join(",");let area=data.area.join(",");let director=data.director.join(",");let actor=data.actor.join(",");let content=data.description;base_vod={vod_id:input,vod_name:tilte,type_name:vod_type,vod_actor:actor,vod_director:director,vod_content:content,vod_remarks:area,vod_pic:urljoin2(input,img)};let delta=200;let vod_play={};let sites=data.playlink_sites;for(let i in sites){let site=sites[i];let playList="";let vodItems=[];if(data.allupinfo){let total=parseInt(data.allupinfo[site]);for(let j=1;j<total;j+=delta){let end=Math.min(total,j+delta-1);let url2=buildUrl(input,{start:j,end:end,site:site});let vod_data=JSON.parse(fetch(url2),fetch_params).data;if(vod_data.allepidetail){vod_data=vod_data.allepidetail[site];vod_data.forEach(function(item,index){vodItems.push((item.playlink_num||"")+"$"+(item.url||""))})}else{vod_data=vod_data.defaultepisode;vod_data.forEach(function(item,index){vodItems.push((item.period||"")+(item.name||"")+"$"+item.url||"")})}}}else{let item=data.playlinksdetail[site];vodItems.push((item.sort||"")+"$"+(item.default_url||""))}if(vodItems.length>0){playList=vodItems.join("#")}if(playList.length<1){continue}vod_play[site]=playList}let tabs=Object.keys(vod_play);let playUrls=[];for(let id in tabs){playUrls.push(vod_play[tabs[id]])}if(tabs.length>0){vod_play_from=tabs.join("$$$");vod_play_url=playUrls.join("$$$");base_vod.vod_play_from=vod_play_from;base_vod.vod_play_url=vod_play_url}vod=base_vod;',
搜索:'json:data.longData.rows;titleTxt;cover;score;cat_id+id;description',
// 搜索:'json:data.longData.rows;titleTxt;cover;cat_name;cat_id+en_id;description',
搜索:'json:data.longData.rows;titleTxt||titlealias;cover;cat_name;cat_id+en_id;description',
}
\ No newline at end of file
3.3.4
\ No newline at end of file
3.3.5
\ No newline at end of file
......@@ -4,7 +4,7 @@
<a href="https://alist.nn.ci"><img height="100px" alt="logo" src="https://gitcode.net/qq_32394351/dr_py/-/raw/1fe0e082b1ceacc4469d7f175a605cc2edf0bab0/static/img/icon.png"/></a>
<p><em>🗂️A webServer convert web and x5 movie sites to cms api data</em></p>
<a href="https://gitcode.net/qq_32394351/dr_py/-/releases">
<img src="https://img.shields.io/badge/version-3.2.1-blue" alt="latest version" />
<img src="https://img.shields.io/badge/version-3.3.5-blue" alt="latest version" />
</a>
<a href="https://gitcode.net/qq_32394351/dr_py/-/issues">
<img src="https://img.shields.io/badge/discussions-2-orange" alt="discussions" />
......@@ -52,6 +52,7 @@
- [X] 3.升级到3.3.2,增加自定义本地文件路由: {{ host }}/files/文件名和{{ host }}/txt/文件名 比如 {{ host }}/files/custom_spider.jar
- [X] 4.升级到3.3.4,修改了默认爬虫jar方案,支持轮询和并发json解析,新增用户自定义解析配置
- [ ] 5.待开发搜索支持js写法(后续再考虑首页推荐支持json双模式+js)
- [X] 6.版本升级3.3.5.搜索支持js写法,并修复了360影视搜索问题(搜索定位标题支持||多个分开合并,解决猫壳自动过滤搜索结果问题,比如月升沧海改名了会被猫壳过滤)
###### 2022/09/07
- [X] 1.优化后台管理登录界面,升级更新脚本
- [X] 2.增加了镜像合并脚本(三合一直接拉 hjdhnx/drpy 即可)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册