提交 05c8832d 编写于 作者: H hjdhnx

增加了兔小贝儿歌

上级 61407aff
......@@ -26,7 +26,7 @@ py_ctx = {
'requests':requests,'print':print,'base64Encode':base64Encode,'baseDecode':baseDecode,
'log':logger.info,'fetch':fetch,'post':post,'request':request,'getCryptoJS':getCryptoJS,
'buildUrl':buildUrl,'getHome':getHome,'setDetail':setDetail,'join':join,'urljoin2':urljoin2,
'PC_UA':PC_UA,'MOBILE_UA':MOBILE_UA,'UC_UA':UC_UA
'PC_UA':PC_UA,'MOBILE_UA':MOBILE_UA,'UC_UA':UC_UA,'IOS_UA':IOS_UA
}
# print(getCryptoJS())
......@@ -104,6 +104,8 @@ class CMS:
headers[k] = PC_UA
elif v == 'UC_UA':
headers[k] = UC_UA
elif v == 'IOS_UA':
headers[k] = IOS_UA
lower_keys = list(map(lambda x:x.lower(),keys))
if not 'user-agent' in lower_keys:
headers['User-Agent'] = UA
......@@ -302,6 +304,14 @@ class CMS:
else:
return ''
def dealJson(self,html):
try:
res = re.search('.*?{(.*)}',html,re.M|re.I).groups()[0]
html = '{' + res + '}'
return html
except:
return html
def checkHtml(self,r):
r.encoding = self.encoding
html = r.text
......@@ -474,6 +484,8 @@ class CMS:
return self.blank()
jsp = jsoup(self.homeUrl)
is_json = str(p[0]).startswith('json:')
if is_json:
html = self.dealJson(html)
pdfh = jsp.pjfh if is_json else jsp.pdfh
pdfa = jsp.pjfa if is_json else jsp.pdfa
pd = jsp.pj if is_json else jsp.pd
......@@ -481,12 +493,17 @@ class CMS:
try:
if self.double:
items = pdfa(html, p[0])
# print(items)
for item in items:
items2 = pdfa(item,p[1])
# print(items2)
for item2 in items2:
try:
title = pdfh(item2, p[2])
img = pd(item2, p[3])
try:
img = pd(item2, p[3])
except:
img = ''
desc = pdfh(item2, p[4])
links = [pd(item2, p5) if not self.detailUrl else pdfh(item2, p5) for p5 in p[5].split('+')]
link = '$'.join(links)
......@@ -617,6 +634,7 @@ class CMS:
r = requests.get(url, headers=self.headers, timeout=self.timeout)
html = self.checkHtml(r)
if is_json:
html = self.dealJson(html)
html = json.loads(html)
# print(html)
items = pdfa(html,p[0].replace('json:','',1))
......@@ -659,8 +677,10 @@ class CMS:
def detailOneVod(self,id,fyclass=''):
detailUrl = str(id)
vod = {}
if not detailUrl.startswith('http'):
if not detailUrl.startswith('http') and not '/' in detailUrl:
url = self.detailUrl.replace('fyid', detailUrl).replace('fyclass',fyclass)
elif '/' in detailUrl:
url = urljoin(self.homeUrl,detailUrl)
else:
url = detailUrl
print(url)
......@@ -671,8 +691,8 @@ class CMS:
vod['vod_play_from'] = '道长在线'
vod['vod_remarks'] = detailUrl
vod['vod_actor'] = '没有二级,只有一级链接直接嗅探播放'
vod['vod_content'] = detailUrl
vod['vod_play_url'] = '嗅探播放$'+self.play_url+detailUrl
vod['vod_content'] = url
vod['vod_play_url'] = '嗅探播放$'+self.play_url+url
print(vod)
return vod
......@@ -718,6 +738,7 @@ class CMS:
r = requests.get(url, headers=self.headers, timeout=self.timeout)
html = self.checkHtml(r)
if is_json:
html = self.dealJson(html)
html = json.loads(html)
if p.get('title'):
p1 = p['title'].split(';')
......@@ -867,7 +888,7 @@ class CMS:
'list': []
}
logger.info(f'{self.getName()}获取详情页耗时:{get_interval(t1)}毫秒,发生错误:{e}')
print(result)
# print(result)
return result
def searchContent(self, key, fypage=1):
......@@ -919,6 +940,7 @@ class CMS:
r = requests.get(url, headers=self.headers,timeout=self.timeout)
html = self.checkHtml(r)
if is_json:
html = self.dealJson(html)
html = json.loads(html)
# print(html)
if not is_json and html.find('输入验证码') > -1:
......@@ -1008,6 +1030,7 @@ class CMS:
play_url = lazy_url
else:
jscode = str(self.lazy).split('js:')[1]
jsp = jsoup(self.url)
# jscode = f'var input={play_url};{jscode}'
# print(jscode)
headers['Referer'] = getHome(play_url)
......@@ -1018,6 +1041,7 @@ class CMS:
'jxs':jxs,
'getParse':self.d.getParse,
'saveParse':self.d.saveParse,
'jsp': jsp,
'pdfh': self.d.jsp.pdfh,
'pdfa': self.d.jsp.pdfa, 'pd': self.d.jsp.pd,
})
......
var rule = Object.assign(muban.首图2,{
title:'LIBVIO',
host:'https://www.libvio.me',
// host:'https://www.libvio.me',
host:'https://www.libvio.fun',
url:'/type/fyclass-fypage.html',
class_parse:'.stui-header__menu li:gt(0):lt(7);a&&Text;a&&href;/(\\d+).html',
searchUrl:'/search/**----------fypage---.html',
......
3.4.4
\ No newline at end of file
3.4.5
\ No newline at end of file
var rule = {
title:'兔小贝',
host:'https://www.tuxiaobei.com',
homeUrl:'',
url:'/list/mip-data?typeId=fyclass&page=fypage&callback=',
detailUrl:'/play/fyid',
searchUrl:'/search/index?key=**',
headers:{
'User-Agent':'MOBILE_UA'
},
timeout:5000,
class_url:'2&3&4&25',
class_name:'儿歌&故事&国学&启蒙',
//class_name:'#page-viewport&&ul&&li;.text&&Text;a&&href;/(.*)',
cate_exclude:'应用',
推荐:'.pic-list.list-box;.items;.text&&Text;mip-img&&src;.all&&Text;a&&href',
double:true,
limit:5,
play_parse:true,
lazy:'js:fetch_params.headers["user-agent"] = IOS_UA;let html=fetch(input,fetch_params);let src = jsp.pdfh(html,"body&&#videoWrap&&video-src");input=src;',
// 一级:'json:data.items;name;image;collect_num;category_id+video_id',
一级:'json:data.items;name;image;duration_string;video_id',
二级:'*',
搜索:'.list-con&&.items;.text&&Text;mip-img&&src;.time&&Text;a&&href',
searchable:1,
quickSearch:0,
}
\ No newline at end of file
......@@ -48,6 +48,7 @@
[获取本地设备信息](https://m.jb51.net/article/140716.htm)
###### 2022/09/10
- [X] 1.升级至3.4.4.增加小强迷源,增加二级重定向属性(提供重定向后的源码,让代码重新取重定向过后的线路和播放列表)
- [X] 1.升级至3.4.5.增加兔小贝儿歌源,优化json:细节处理以及详情页拼接细节
###### 2022/09/09
- [X] 1.增加西瓜源,修复一级不支持lazy的bug
- [X] 2.兄弟们dockerhub没法push镜像不知道咋回事,3.4.1的镜像自己用docker目录下的文件build吧
......
......@@ -174,7 +174,7 @@ def base_request(url,obj):
method = 'get'
obj['method'] = 'method'
# print(obj)
print(f'{method}:{url}')
print(f"{method}:{url}:{obj['headers']}")
try:
# r = requests.get(url, headers=headers, params=body, timeout=timeout)
if method.lower() == 'get':
......@@ -194,7 +194,7 @@ def base_request(url,obj):
def fetch(url,obj):
obj = dealObj(obj)
if not obj.get('headers') or not obj['headers'].get('User-Agent'):
obj['headers']['User-Agent'] = PC_UA
obj['headers']['User-Agent'] = obj['headers'].get('user-agent',PC_UA)
return base_request(url,obj)
def post(url,obj):
......@@ -206,7 +206,7 @@ def request(url,obj):
obj = dealObj(obj)
# print(f'{method}:{url}')
if not obj.get('headers') or not obj['headers'].get('User-Agent'):
obj['headers']['User-Agent'] = UC_UA
obj['headers']['User-Agent'] = obj['headers'].get('user-agent',UC_UA)
return base_request(url, obj)
......
......@@ -28,13 +28,13 @@ class jsoup:
option = parse.split('&&')[-1]
parse = parse.split('&&')[:-1] # 如果只有一个&& 取的就直接是0
if len(parse) > 1: # 如果不大于1可能就是option操作,不需要拼eq
parse = ' '.join([i if self.test(':eq|:lt|:gt',i) else f'{i}:eq(0)' for i in parse])
parse = ' '.join([i if self.test(':eq|:lt|:gt|#',i) else f'{i}:eq(0)' for i in parse])
else:
parse = parse[0] if self.test(':eq|:lt|:gt',parse[0]) else f'{parse[0]}:eq(0)'
parse = parse[0] if self.test(':eq|:lt|:gt|#',parse[0]) else f'{parse[0]}:eq(0)'
if option:
# print(f'parse:{parse}=>(option:{option})')
ret = doc(parse)
# print(html)
# FIXME 解析出来有多个的情况应该自动取第一个
if option == 'Text':
ret = ret.text()
......
......@@ -96,6 +96,7 @@ def copy_to_update():
# print(f'升级失败,找不到目录{dr_path}')
logger.info(f'升级失败,找不到目录{dr_path}')
return False
# 千万不能覆盖super,base
paths = ['js','models','controllers','libs','static','templates','utils','txt']
for path in paths:
force_copy_files(os.path.join(dr_path, path),os.path.join(base_path, path))
......
......@@ -14,6 +14,7 @@ MOBILE_UA = 'Mozilla/5.0 (Linux; Android 11; M2007J3SC Build/RKQ1.200826.002; wv
PC_UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36'
UA = 'Mozilla/5.0'
UC_UA = 'Mozilla/5.0 (Linux; U; Android 9; zh-CN; MI 9 Build/PKQ1.181121.001) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.108 UCBrowser/12.5.5.1035 Mobile Safari/537.36'
IOS_UA = 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1'
headers = {
'Referer': 'https://www.baidu.com',
'user-agent': UA,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册