提交 ec2cfa5b 编写于 作者: H hjdhnx

555影视增加了首页源和限制条数写法

上级 c4ca5a47
...@@ -19,7 +19,7 @@ from utils.web import * ...@@ -19,7 +19,7 @@ from utils.web import *
rule_list = getRules() rule_list = getRules()
print(rule_list) print(rule_list)
def getParmas(key=None): def getParmas(key=None,value=''):
""" """
获取链接参数 获取链接参数
:param key: :param key:
...@@ -31,7 +31,7 @@ def getParmas(key=None): ...@@ -31,7 +31,7 @@ def getParmas(key=None):
elif request.method == 'GET': elif request.method == 'GET':
args = request.args args = request.args
if key: if key:
return args.get(key,'') return args.get(key,value)
else: else:
return args return args
...@@ -66,7 +66,8 @@ def vod(): ...@@ -66,7 +66,8 @@ def vod():
flag = getParmas('flag') flag = getParmas('flag')
filter = getParmas('filter') filter = getParmas('filter')
t = getParmas('t') t = getParmas('t')
pg = getParmas('pg') pg = getParmas('pg','1')
pg = int(pg)
ids = getParmas('ids') ids = getParmas('ids')
q = getParmas('q') q = getParmas('q')
...@@ -75,7 +76,10 @@ def vod(): ...@@ -75,7 +76,10 @@ def vod():
# print(data) # print(data)
return jsonify(data) return jsonify(data)
if ac and ids: # 二级 if ac and ids: # 二级
data = cms.detailContent(ids.split(',')) id_list = ids.split(',')
# print(len(id_list))
# print(id_list)
data = cms.detailContent(pg,id_list)
# print(data) # print(data)
return jsonify(data) return jsonify(data)
if wd: # 搜索 if wd: # 搜索
...@@ -84,7 +88,7 @@ def vod(): ...@@ -84,7 +88,7 @@ def vod():
return jsonify(data) return jsonify(data)
# return jsonify({'rule':rule,'js_code':js_code}) # return jsonify({'rule':rule,'js_code':js_code})
home_data = cms.homeContent() home_data = cms.homeContent(pg)
return jsonify(home_data) return jsonify(home_data)
@app.route('/clear') @app.route('/clear')
......
...@@ -10,6 +10,9 @@ var rule = { ...@@ -10,6 +10,9 @@ var rule = {
}, },
class_name:'电影&连续剧&福利&动漫&综艺', class_name:'电影&连续剧&福利&动漫&综艺',
class_url:'1&2&124&4&3', class_url:'1&2&124&4&3',
limit:10,
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位
一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href', 一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href',
二级:{"title":"h1&&Text;.module-info-tag&&Text","img":".lazyload&&data-original","desc":".module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text","content":".module-info-introduction&&Text","tabs":".module-tab-item","lists":".module-play-list:eq(#id) a"}, 二级:{"title":"h1&&Text;.module-info-tag&&Text","img":".lazyload&&data-original","desc":".module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text","content":".module-info-introduction&&Text","tabs":".module-tab-item","lists":".module-play-list:eq(#id) a"},
搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text', 搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text',
......
...@@ -5,20 +5,24 @@ ...@@ -5,20 +5,24 @@
# Date : 2022/8/25 # Date : 2022/8/25
import requests import requests
import re import re
import math
from utils.web import * from utils.web import *
from utils.config import config from utils.config import config
from utils.htmlParser import jsoup from utils.htmlParser import jsoup
from urllib.parse import urljoin from urllib.parse import urljoin
from concurrent.futures import ThreadPoolExecutor # 引入线程池
class CMS: class CMS:
def __init__(self,rule): def __init__(self,rule):
host = rule.get('host','').rstrip('/') host = rule.get('host','').rstrip('/')
timeout = rule.get('timeout',2000) timeout = rule.get('timeout',5000)
homeUrl = rule.get('homeUrl','/') homeUrl = rule.get('homeUrl','/')
url = rule.get('url','') url = rule.get('url','')
detailUrl = rule.get('detailUrl','') detailUrl = rule.get('detailUrl','')
searchUrl = rule.get('searchUrl','') searchUrl = rule.get('searchUrl','')
headers = rule.get('headers',{}) headers = rule.get('headers',{})
limit = rule.get('limit',6)
self.limit = min(limit,20)
keys = headers.keys() keys = headers.keys()
for k in headers.keys(): for k in headers.keys():
if str(k).lower() == 'user-agent': if str(k).lower() == 'user-agent':
...@@ -45,9 +49,11 @@ class CMS: ...@@ -45,9 +49,11 @@ class CMS:
self.class_name = rule.get('class_name','') self.class_name = rule.get('class_name','')
self.class_url = rule.get('class_url','') self.class_url = rule.get('class_url','')
self.class_parse = rule.get('class_parse','') self.class_parse = rule.get('class_parse','')
self.double = rule.get('double',False)
self.一级 = rule.get('一级','') self.一级 = rule.get('一级','')
self.二级 = rule.get('二级','') self.二级 = rule.get('二级','')
self.搜索 = rule.get('搜索','') self.搜索 = rule.get('搜索','')
self.推荐 = rule.get('推荐','')
self.title = rule.get('title','') self.title = rule.get('title','')
self.timeout = round(int(timeout)/1000,2) self.timeout = round(int(timeout)/1000,2)
self.filter = rule.get('filter',[]) self.filter = rule.get('filter',[])
...@@ -100,11 +106,12 @@ class CMS: ...@@ -100,11 +106,12 @@ class CMS:
pq = jsp.pq pq = jsp.pq
return pdfh,pdfa,pd,pq return pdfh,pdfa,pd,pq
def homeContent(self): def homeContent(self,fypage=1):
# yanaifei # yanaifei
# https://yanetflix.com/vodtype/dianying.html # https://yanetflix.com/vodtype/dianying.html
result = {} result = {}
classes = [] classes = []
video_result = self.blank()
if self.class_url and self.class_name: if self.class_url and self.class_name:
class_names = self.class_name.split('&') class_names = self.class_name.split('&')
...@@ -116,41 +123,104 @@ class CMS: ...@@ -116,41 +123,104 @@ class CMS:
'type_id': class_urls[i] 'type_id': class_urls[i]
}) })
# print(self.url) # print(self.url)
if self.homeUrl.startswith('http') and self.class_parse: if self.homeUrl.startswith('http'):
# print(self.homeUrl) # print(self.homeUrl)
# print(self.class_parse) # print(self.class_parse)
try: try:
r = requests.get(self.homeUrl,headers=self.headers,timeout=self.timeout) r = requests.get(self.homeUrl,headers=self.headers,timeout=self.timeout)
r.encoding = r.apparent_encoding
html = r.text html = r.text
p = self.class_parse.split(';') if self.class_parse:
jsp = jsoup(self.url) p = self.class_parse.split(';')
pdfh = jsp.pdfh jsp = jsoup(self.url)
pdfa = jsp.pdfa pdfh = jsp.pdfh
pd = jsp.pd pdfa = jsp.pdfa
items = pdfa(html,p[0]) pd = jsp.pd
for item in items: items = pdfa(html,p[0])
title = pdfh(item, p[1]) for item in items:
url = pd(item, p[2]) title = pdfh(item, p[1])
tag = url url = pd(item, p[2])
if len(p) > 3 and p[3].strip(): tag = url
tag = self.regexp(p[3].strip(),url,0) if len(p) > 3 and p[3].strip():
classes.append({ tag = self.regexp(p[3].strip(),url,0)
'type_name': title, classes.append({
'type_id': tag 'type_name': title,
}) 'type_id': tag
})
video_result = self.homeVideoContent(html,fypage)
except Exception as e: except Exception as e:
print(e) print(e)
result['class'] = classes result['class'] = classes
if self.filter: if self.filter:
result['filters'] = config['filter'] result['filters'] = config['filter']
result.update(video_result)
return result return result
def homeVideoContent(self): def homeVideoContent(self,html,fypage=1):
result = { if not self.推荐:
'list': [] return self.blank()
}
return result p = self.推荐.split(';') # 解析
if not self.double and len(p) < 5:
return self.blank()
if self.double and len(p) < 6:
return self.blank()
result = {}
videos = []
jsp = jsoup(self.homeUrl)
pdfh = jsp.pdfh
pdfa = jsp.pdfa
pd = jsp.pd
try:
if self.double:
items = pdfa(html, p[0])
for item in items:
items2 = pdfa(item,p[1])
for item2 in items2:
title = pdfh(item2, p[2])
img = pd(item2, p[3])
desc = pdfh(item2, p[4])
link = pd(item2, p[5])
content = '' if len(p) < 7 else pdfh(item2, p[6])
videos.append({
"vod_id": link,
"vod_name": title,
"vod_pic": img,
"vod_remarks": desc,
"vod_content": content,
"type_id": 1,
"type_name": "首页推荐",
})
else:
items = pdfa(html, p[0])
for item in items:
title = pdfh(item, p[1])
img = pd(item, p[2])
desc = pdfh(item, p[3])
link = pd(item, p[4])
content = '' if len(p) < 6 else pdfh(item, p[5])
videos.append({
"vod_id": link,
"vod_name": title,
"vod_pic": img,
"vod_remarks": desc,
"vod_content": content,
"type_id": 1,
"type_name": "首页推荐",
})
result['list'] = videos
result['code'] = 1
result['msg'] = '数据列表'
result['page'] = fypage
result['pagecount'] = math.ceil(len(videos)/self.limit)
result['limit'] = self.limit
result['total'] = len(videos)
return result
except Exception as e:
print(f'首页内容获取失败:{e}')
return self.blank()
def categoryContent(self, fyclass, fypage): def categoryContent(self, fyclass, fypage):
""" """
...@@ -175,6 +245,7 @@ class CMS: ...@@ -175,6 +245,7 @@ class CMS:
if fypage == 1 and self.test('[\[\]]',url): if fypage == 1 and self.test('[\[\]]',url):
url = url.split('[')[1].split(']')[0] url = url.split('[')[1].split(']')[0]
r = requests.get(url, headers=self.headers,timeout=self.timeout) r = requests.get(url, headers=self.headers,timeout=self.timeout)
r.encoding = r.apparent_encoding
print(r.url) print(r.url)
p = self.一级.split(';') # 解析 p = self.一级.split(';') # 解析
if len(p) < 5: if len(p) < 5:
...@@ -207,25 +278,20 @@ class CMS: ...@@ -207,25 +278,20 @@ class CMS:
result['list'] = videos result['list'] = videos
result['page'] = fypage result['page'] = fypage
result['pagecount'] = 9999 result['pagecount'] = 9999
result['limit'] = 90 result['limit'] = 9999
result['total'] = 999999 result['total'] = 999999
return result return result
def detailContent(self, array): def detailOneVod(self,id):
""" detailUrl = str(id)
cms二级数据 vod = {}
:param array:
:return:
"""
# video-info-header
detailUrl = str(array[0])
print(detailUrl)
if not detailUrl.startswith('http'): if not detailUrl.startswith('http'):
url = self.detailUrl.replace('fyid', detailUrl) url = self.detailUrl.replace('fyid', detailUrl)
else: else:
url = detailUrl url = detailUrl
print(url) # print(url)
r = requests.get(url, headers=self.headers,timeout=self.timeout) r = requests.get(url, headers=self.headers,timeout=self.timeout)
r.encoding = r.apparent_encoding
html = r.text html = r.text
# print(html) # print(html)
p = self.二级 # 解析 p = self.二级 # 解析
...@@ -236,15 +302,10 @@ class CMS: ...@@ -236,15 +302,10 @@ class CMS:
vod['vod_actor'] = '没有二级,只有一级链接直接嗅探播放' vod['vod_actor'] = '没有二级,只有一级链接直接嗅探播放'
vod['content'] = detailUrl vod['content'] = detailUrl
vod['vod_play_url'] = '嗅探播放$'+detailUrl vod['vod_play_url'] = '嗅探播放$'+detailUrl
result = { return vod
'list': [
vod
]
}
return result
if not isinstance(p,dict): if not isinstance(p,dict):
return self.blank() return vod
jsp = jsoup(self.url) jsp = jsoup(self.url)
pdfh = jsp.pdfh pdfh = jsp.pdfh
...@@ -313,10 +374,24 @@ class CMS: ...@@ -313,10 +374,24 @@ class CMS:
vod['vod_play_from'] = vod_play_from vod['vod_play_from'] = vod_play_from
vod['vod_play_url'] = vod_play_url vod['vod_play_url'] = vod_play_url
return vod
def detailContent(self, fypage, array):
"""
cms二级数据
:param array:
:return:
"""
array = array[(fypage-1)*self.limit:min(self.limit*fypage,len(array))]
thread_pool = ThreadPoolExecutor(min(self.limit,len(array))) # 定义线程池来启动多线程执行此任务
obj_list = []
for vod_url in array:
obj = thread_pool.submit(self.detailOneVod, vod_url)
obj_list.append(obj)
thread_pool.shutdown(wait=True) # 等待所有子线程并行完毕
vod_list = [obj.result() for obj in obj_list]
result = { result = {
'list': [ 'list': vod_list
vod
]
} }
return result return result
...@@ -327,6 +402,7 @@ class CMS: ...@@ -327,6 +402,7 @@ class CMS:
url = self.searchUrl.replace('**', key).replace('fypage',pg) url = self.searchUrl.replace('**', key).replace('fypage',pg)
print(url) print(url)
r = requests.get(url, headers=self.headers) r = requests.get(url, headers=self.headers)
r.encoding = r.apparent_encoding
html = r.text html = r.text
if not self.搜索: if not self.搜索:
return self.blank() return self.blank()
...@@ -364,13 +440,13 @@ class CMS: ...@@ -364,13 +440,13 @@ class CMS:
if __name__ == '__main__': if __name__ == '__main__':
from utils import parser from utils import parser
# js_path = f'js/玩偶姐姐.js' # js_path = f'js/玩偶姐姐.js'
js_path = f'js/蓝莓影视.js' js_path = f'js/555影视.js'
ctx, js_code = parser.runJs(js_path) ctx, js_code = parser.runJs(js_path)
rule = ctx.eval('rule') rule = ctx.eval('rule')
cms = CMS(rule) cms = CMS(rule)
print(cms.title) print(cms.title)
print(cms.homeContent()) print(cms.homeContent())
print(cms.categoryContent('20',1)) # print(cms.categoryContent('20',1))
# print(cms.categoryContent('latest',1)) # print(cms.categoryContent('latest',1))
# print(cms.detailContent(['https://hongkongdollvideo.com/video/b22c7cb6df40a3c4.html'])) # print(cms.detailContent(['https://hongkongdollvideo.com/video/b22c7cb6df40a3c4.html']))
# cms.categoryContent('dianying',1) # cms.categoryContent('dianying',1)
......
...@@ -29,7 +29,7 @@ var rule = { ...@@ -29,7 +29,7 @@ var rule = {
'User-Agent':'MOBILE_UA', 'User-Agent':'MOBILE_UA',
"Cookie": "searchneed=ok" "Cookie": "searchneed=ok"
}, },
timeout:5000,//网站的全局请求超时,默认是2000毫秒 timeout:5000,//网站的全局请求超时,默认是3000毫秒
//动态分类获取 列表;标题;链接;正则提取 不需要正则的时候后面别加分号 //动态分类获取 列表;标题;链接;正则提取 不需要正则的时候后面别加分号
class_parse:'#side-menu:lt(1) li;a&&Text;a&&href;com/(.*?)/', class_parse:'#side-menu:lt(1) li;a&&Text;a&&href;com/(.*?)/',
// 类似海阔一级 列表;标题;图片;描述;链接;详情 其中最后一个参数选填 // 类似海阔一级 列表;标题;图片;描述;链接;详情 其中最后一个参数选填
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册