提交 3253a1ae 编写于 作者: H hjdhnx

增加了更多的几个源

上级 f089e04e
...@@ -13,6 +13,7 @@ from utils.log import logger ...@@ -13,6 +13,7 @@ from utils.log import logger
from utils.htmlParser import jsoup from utils.htmlParser import jsoup
from urllib.parse import urljoin from urllib.parse import urljoin
from concurrent.futures import ThreadPoolExecutor # 引入线程池 from concurrent.futures import ThreadPoolExecutor # 引入线程池
from time import time
class CMS: class CMS:
def __init__(self,rule,db=None,RuleClass=None): def __init__(self,rule,db=None,RuleClass=None):
...@@ -27,7 +28,7 @@ class CMS: ...@@ -27,7 +28,7 @@ class CMS:
headers = rule.get('headers',{}) headers = rule.get('headers',{})
limit = rule.get('limit',6) limit = rule.get('limit',6)
encoding = rule.get('编码', 'utf-8') encoding = rule.get('编码', 'utf-8')
self.limit = min(limit,20) self.limit = min(limit,30)
keys = headers.keys() keys = headers.keys()
for k in headers.keys(): for k in headers.keys():
if str(k).lower() == 'user-agent': if str(k).lower() == 'user-agent':
...@@ -132,7 +133,7 @@ class CMS: ...@@ -132,7 +133,7 @@ class CMS:
cls2 = res.class_url.split('&') cls2 = res.class_url.split('&')
classes = [{'type_name':cls[i],'type_id':cls2[i]} for i in range(len(cls))] classes = [{'type_name':cls[i],'type_id':cls2[i]} for i in range(len(cls))]
# _logger.info(classes) # _logger.info(classes)
logger.info(f"使用缓存分类:{classes}") logger.info(f"{self.getName()}使用缓存分类:{classes}")
return classes return classes
else: else:
return [] return []
...@@ -149,21 +150,21 @@ class CMS: ...@@ -149,21 +150,21 @@ class CMS:
# self.db.metadata.clear() # self.db.metadata.clear()
# RuleClass = rule_classes.init(self.db) # RuleClass = rule_classes.init(self.db)
res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first() res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
print(res) # print(res)
if res: if res:
res.class_name = class_name res.class_name = class_name
res.class_url = class_url res.class_url = class_url
self.db.session.add(res) self.db.session.add(res)
msg = f'修改成功:{res.id}' msg = f'{self.getName()}修改成功:{res.id}'
else: else:
res = self.RuleClass(name=name, class_name=class_name, class_url=class_url) res = self.RuleClass(name=name, class_name=class_name, class_url=class_url)
self.db.session.add(res) self.db.session.add(res)
res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first() res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
msg = f'新增成功:{res.id}' msg = f'{self.getName()}新增成功:{res.id}'
try: try:
self.db.session.commit() self.db.session.commit()
print(msg) logger.info(msg)
except Exception as e: except Exception as e:
return f'发生了错误:{e}' return f'发生了错误:{e}'
...@@ -171,6 +172,7 @@ class CMS: ...@@ -171,6 +172,7 @@ class CMS:
def homeContent(self,fypage=1): def homeContent(self,fypage=1):
# yanaifei # yanaifei
# https://yanetflix.com/vodtype/dianying.html # https://yanetflix.com/vodtype/dianying.html
t1 = time()
result = {} result = {}
classes = [] classes = []
video_result = self.blank() video_result = self.blank()
...@@ -191,43 +193,56 @@ class CMS: ...@@ -191,43 +193,56 @@ class CMS:
# print(self.class_parse) # print(self.class_parse)
try: try:
if self.class_parse: if self.class_parse:
t3 = time()
cache_classes = self.getClasses() cache_classes = self.getClasses()
t4 = time()
logger.info(f'{self.getName()}读取缓存耗时:{round((t4-t3)*1000,2)}毫秒')
if len(cache_classes) > 0: if len(cache_classes) > 0:
classes = cache_classes classes = cache_classes
# print(cache_classes) # print(cache_classes)
has_cache = True has_cache = True
new_classes = [] # logger.info(f'是否有缓存分类:{has_cache}')
r = requests.get(self.homeUrl, headers=self.headers, timeout=self.timeout) if has_cache and not self.推荐:
r.encoding = self.encoding pass
html = r.text else:
if self.class_parse and not has_cache: new_classes = []
p = self.class_parse.split(';') r = requests.get(self.homeUrl, headers=self.headers, timeout=self.timeout)
jsp = jsoup(self.url) r.encoding = self.encoding
pdfh = jsp.pdfh html = r.text
pdfa = jsp.pdfa if self.class_parse and not has_cache:
pd = jsp.pd p = self.class_parse.split(';')
items = pdfa(html,p[0]) print(p)
for item in items: jsp = jsoup(self.url)
title = pdfh(item, p[1]) pdfh = jsp.pdfh
url = pd(item, p[2]) pdfa = jsp.pdfa
tag = url pd = jsp.pd
if len(p) > 3 and p[3].strip(): items = pdfa(html,p[0])
tag = self.regexp(p[3].strip(),url,0) print(len(items))
new_classes.append({ print(items)
'type_name': title, for item in items:
'type_id': tag title = pdfh(item, p[1])
}) url = pd(item, p[2])
if len(new_classes) > 0: print(url)
classes.extend(new_classes) tag = url
self.saveClass(classes) if len(p) > 3 and p[3].strip():
video_result = self.homeVideoContent(html,fypage) tag = self.regexp(p[3].strip(),url,0)
new_classes.append({
'type_name': title,
'type_id': tag
})
if len(new_classes) > 0:
classes.extend(new_classes)
self.saveClass(classes)
video_result = self.homeVideoContent(html,fypage)
except Exception as e: except Exception as e:
print(e) logger.info(f'{self.getName()}主页发生错误:{e}')
result['class'] = classes result['class'] = classes
if self.filter: if self.filter:
result['filters'] = config['filter'] result['filters'] = config['filter']
result.update(video_result) result.update(video_result)
t2 = time()
logger.info(f'{self.getName()}获取首页耗时:{round((t2-t1)*1000,2)}毫秒')
return result return result
def homeVideoContent(self,html,fypage=1): def homeVideoContent(self,html,fypage=1):
...@@ -291,7 +306,7 @@ class CMS: ...@@ -291,7 +306,7 @@ class CMS:
result['total'] = len(videos) result['total'] = len(videos)
return result return result
except Exception as e: except Exception as e:
print(f'首页内容获取失败:{e}') logger.info(f'首页内容获取失败:{e}')
return self.blank() return self.blank()
def categoryContent(self, fyclass, fypage): def categoryContent(self, fyclass, fypage):
...@@ -301,7 +316,7 @@ class CMS: ...@@ -301,7 +316,7 @@ class CMS:
:param fypage: 页码 :param fypage: 页码
:return: cms一级数据 :return: cms一级数据
""" """
result = {} result = {}
# urlParams = ["", "", "", "", "", "", "", "", "", "", "", ""] # urlParams = ["", "", "", "", "", "", "", "", "", "", "", ""]
# urlParams = [""] * 12 # urlParams = [""] * 12
...@@ -352,6 +367,7 @@ class CMS: ...@@ -352,6 +367,7 @@ class CMS:
result['pagecount'] = 9999 result['pagecount'] = 9999
result['limit'] = 9999 result['limit'] = 9999
result['total'] = 999999 result['total'] = 999999
return result return result
def detailOneVod(self,id): def detailOneVod(self,id):
...@@ -454,6 +470,7 @@ class CMS: ...@@ -454,6 +470,7 @@ class CMS:
:param array: :param array:
:return: :return:
""" """
t1 = time()
array = array[(fypage-1)*self.limit:min(self.limit*fypage,len(array))] array = array[(fypage-1)*self.limit:min(self.limit*fypage,len(array))]
thread_pool = ThreadPoolExecutor(min(self.limit,len(array))) # 定义线程池来启动多线程执行此任务 thread_pool = ThreadPoolExecutor(min(self.limit,len(array))) # 定义线程池来启动多线程执行此任务
obj_list = [] obj_list = []
...@@ -465,6 +482,8 @@ class CMS: ...@@ -465,6 +482,8 @@ class CMS:
result = { result = {
'list': vod_list 'list': vod_list
} }
t2 = time()
logger.info(f'{self.getName()}获取详情页耗时:{round((t2-t1)*1000,2)}毫秒')
return result return result
def searchContent(self, key, fypage=1): def searchContent(self, key, fypage=1):
...@@ -472,7 +491,7 @@ class CMS: ...@@ -472,7 +491,7 @@ class CMS:
if not self.searchUrl: if not self.searchUrl:
return self.blank() return self.blank()
url = self.searchUrl.replace('**', key).replace('fypage',pg) url = self.searchUrl.replace('**', key).replace('fypage',pg)
print(url) logger.info(f'{self.getName()}搜索链接:{url}')
r = requests.get(url, headers=self.headers) r = requests.get(url, headers=self.headers)
r.encoding = self.encoding r.encoding = self.encoding
html = r.text html = r.text
......
var rule = { var rule = {
title:'555影视', title:'555影视',
host:'https://www.5dy5.cc', host:'https://www.5dy6.cc',
// homeUrl:'/', // homeUrl:'/',
url:'/vodshow/fyclass--------fypage---.html', url:'/vodshow/fyclass--------fypage---.html',
searchUrl:'/vodsearch/**----------fypage---.html', searchUrl:'/vodsearch/**----------fypage---.html',
...@@ -8,9 +8,10 @@ var rule = { ...@@ -8,9 +8,10 @@ var rule = {
'User-Agent':'MOBILE_UA', 'User-Agent':'MOBILE_UA',
"Cookie": "searchneed=ok" "Cookie": "searchneed=ok"
}, },
class_name:'电影&连续剧&福利&动漫&综艺', // class_name:'电影&连续剧&福利&动漫&综艺',
class_url:'1&2&124&4&3', // class_url:'1&2&124&4&3',
limit:6, class_parse:'.navbar-items li:gt(2):lt(8);a&&Text;a&&href;/(\\d+).html',
limit:10,
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href', 推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位 double:true, // 推荐内容是否双层定位
一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href', 一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href',
......
var rule = {
title:'夜空',
host:'https://www.yekong.cc',
// homeUrl:'/',
url:'/pianku-fyclass--------fypage---/',
searchUrl:'/search-**----------fypage---/',
headers:{//网站的请求头,完整支持所有的,常带ua和cookies
'User-Agent':'MOBILE_UA',
"Cookie": "searchneed=ok"
},
// class_name:'电影&连续剧&福利&动漫&综艺',
// class_url:'1&2&124&4&3',
class_parse:'.navbar-items li:gt(1):lt(7);a&&Text;a&&href;.*v/(.*?)/',
limit:10,
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位
一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href',
二级:{"title":"h1&&Text;.module-info-tag&&Text","img":".lazyload&&data-original","desc":".module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text","content":".module-info-introduction&&Text","tabs":".module-tab-item","lists":".module-play-list:eq(#id) a"},
搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text',
}
\ No newline at end of file
var rule = {
title:'瓜皮TV',
host:'https://guapitv.xyz',
// homeUrl:'/',
url:'/vodshow/fyclass--------fypage---.html',
searchUrl:'/vodsearch/**----------fypage---.html',
headers:{//网站的请求头,完整支持所有的,常带ua和cookies
'User-Agent':'MOBILE_UA',
"Cookie": "searchneed=ok"
},
// class_name:'电影&连续剧&福利&动漫&综艺',
// class_url:'1&2&124&4&3',
class_parse:'.navbar-items li:gt(1):lt(8);a&&Text;a&&href;.*-(.*?).html',
limit:10,
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位
一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href',
二级:{"title":"h1&&Text;.module-info-tag&&Text","img":".lazyload&&data-original","desc":".module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text","content":".module-info-introduction&&Text","tabs":".module-tab-item","lists":".module-play-list:eq(#id) a"},
搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text',
}
\ No newline at end of file
var rule = {
title:'莫扎兔',
host:'https://www.mozhatu.com',
// homeUrl:'/',
url:'/index.php/vod/show/id/fyclass/page/fypage.html',
searchUrl:'/vodsearch/**----------fypage---.html',
headers:{//网站的请求头,完整支持所有的,常带ua和cookies
'User-Agent':'MOBILE_UA',
"Cookie": "searchneed=ok"
},
// class_name:'电影&连续剧&福利&动漫&综艺',
// class_url:'1&2&124&4&3',
class_parse:'.navbar-items li:gt(2):lt(8);a&&Text;a&&href;.*/(.*?).html',
limit:10,
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位
一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href',
二级:{"title":"h1&&Text;.module-info-tag&&Text","img":".lazyload&&data-original","desc":".module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text","content":".module-info-introduction&&Text","tabs":".module-tab-item","lists":".module-play-list:eq(#id) a"},
//搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text',
}
\ No newline at end of file
...@@ -6,11 +6,11 @@ var rule = { ...@@ -6,11 +6,11 @@ var rule = {
headers:{ headers:{
'User-Agent':'MOBILE_UA' 'User-Agent':'MOBILE_UA'
}, },
searchUrl:'/vodsearch/**----------fypage---.html', searchUrl:'/search/**-fypage.html',
// class_name:'电影&网剧&剧集&动漫&综艺&记录', // class_name:'电影&网剧&剧集&动漫&综艺&记录',
// class_url:'20&1&2&3&4&23', // class_url:'20&1&2&3&4&23',
class_parse:'.navbar-items li:gt(1):lt(8);a&&Text;a&&href;/(\\d+).html', class_parse:'.navbar-items li:gt(1):lt(8);a&&Text;a&&href;/(\\d+).html',
limit:5, limit:30,
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href', 推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位 double:true, // 推荐内容是否双层定位
一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href', 一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href',
......
...@@ -5,8 +5,11 @@ var rule = { ...@@ -5,8 +5,11 @@ var rule = {
url:'/vodshow/fyclass--------fypage---.html', url:'/vodshow/fyclass--------fypage---.html',
detailUrl:'https://yanetflix.com/voddetail/fyid.html',//非必填 detailUrl:'https://yanetflix.com/voddetail/fyid.html',//非必填
searchUrl:'/vodsearch/**----------fypage---.html', searchUrl:'/vodsearch/**----------fypage---.html',
class_name:'电影&连续剧&综艺&动漫', // class_name:'电影&连续剧&综艺&动漫',
class_url:'dianying&lianxuju&zongyi&dongman', // class_url:'dianying&lianxuju&zongyi&dongman',
class_parse:'.navbar-items li:gt(1):lt(6);a&&Text;a&&href;.*/(.*?).html',
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位
一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href', 一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href',
二级:{"title":"h1&&Text;.module-info-tag&&Text","img":".lazyload&&data-original","desc":".module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text","content":".module-info-introduction&&Text","tabs":".module-tab-item","lists":".module-play-list:eq(#id) a"}, 二级:{"title":"h1&&Text;.module-info-tag&&Text","img":".lazyload&&data-original","desc":".module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text","content":".module-info-introduction&&Text","tabs":".module-tab-item","lists":".module-play-list:eq(#id) a"},
搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text', 搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text',
......
无法预览此类型文件
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册