提交 50b82d76 编写于 作者: H hjdhnx

把相同的模板都进行了继承写法

新增了影视工厂和在线之家
修复了cookemv和莫扎兔等要搜索验证的网站
上级 406a21fc
......@@ -7,6 +7,7 @@ import random
from utils.encode import base64Encode
import js2py
from flask_sqlalchemy import SQLAlchemy
from flask_migrate import Migrate
import config
# import settings
import warnings
......@@ -32,7 +33,7 @@ print('自定义播放解析地址:',app.config.get('PLAY_URL'))
print('当前操作系统',sys.platform)
app.logger.name="drLogger"
db = SQLAlchemy(app)
migrate = Migrate(app, db)
rule_list = getRuleLists()
logger.info(rule_list)
logger.info(f'局域网: {getHost(1, 5705)}/index\n本地: {getHost(0, 5705)}/index')
......@@ -104,6 +105,7 @@ def vod():
# rule = ctx.eval('rule')
ruleDict = ctx.rule.to_dict()
ruleDict['id'] = rule # 把路由请求的id装到字典里,后面播放嗅探才能用
# print(ruleDict)
# print(rule)
# print(type(rule))
# print(ruleDict)
......
......@@ -11,7 +11,7 @@ from utils.web import *
from models import *
from utils.config import config
from utils.log import logger
from utils.encode import base64Encode,baseDecode,fetch,post,request,getCryptoJS,getPreJs,buildUrl
from utils.encode import base64Encode,baseDecode,fetch,post,request,getCryptoJS,getPreJs,buildUrl,getHome,verifyCode
from utils.safePython import safePython
from utils.parser import runPy,runJScode
from utils.htmlParser import jsoup
......@@ -23,7 +23,7 @@ from easydict import EasyDict as edict
py_ctx = {
'requests':requests,'print':print,'base64Encode':base64Encode,'baseDecode':baseDecode,
'log':logger.info,'fetch':fetch,'post':post,'request':request,'getCryptoJS':getCryptoJS,
'buildUrl':buildUrl
'buildUrl':buildUrl,'getHome':getHome,'verifyCode':verifyCode
}
# print(getCryptoJS())
......@@ -35,12 +35,19 @@ class CMS:
self.id = rule.get('id', self.title)
self.lazy = rule.get('lazy', False)
self.play_disable = new_conf.get('PLAY_DISABLE',False)
self.retry_count = new_conf.get('RETRY_CNT',3)
self.lazy_mode = new_conf.get('LAZYPARSE_MODE')
self.vod = redirect(url_for('vod')).headers['Location']
try:
self.vod = redirect(url_for('vod')).headers['Location']
except:
self.vod = '/vod'
# if not self.play_disable and self.lazy:
if not self.play_disable:
self.play_parse = rule.get('play_parse', False)
play_url = getHost(self.lazy_mode)
try:
play_url = getHost(self.lazy_mode)
except:
play_url = getHost(1,5705)
# play_url = new_conf.get('PLAY_URL',getHost(2))
if not play_url.startswith('http'):
play_url = 'http://'+play_url
......@@ -65,6 +72,10 @@ class CMS:
detailUrl = rule.get('detailUrl','')
searchUrl = rule.get('searchUrl','')
headers = rule.get('headers',{})
cookie = self.getCookie()
# print(f'{self.title}cookie:{cookie}')
if cookie:
headers['cookie'] = cookie
limit = rule.get('limit',6)
encoding = rule.get('编码', 'utf-8')
self.limit = min(limit,30)
......@@ -81,6 +92,8 @@ class CMS:
lower_keys = list(map(lambda x:x.lower(),keys))
if not 'user-agent' in lower_keys:
headers['User-Agent'] = UA
if not 'referer' in lower_keys:
headers['Referer'] = host
self.headers = headers
self.host = host
self.homeUrl = urljoin(host,homeUrl) if host and homeUrl else homeUrl
......@@ -180,6 +193,8 @@ class CMS:
res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
# _logger.info('xxxxxx')
if res:
if not all([res.class_name,res.class_url]):
return []
cls = res.class_name.split('&')
cls2 = res.class_url.split('&')
classes = [{'type_name':cls[i],'type_id':cls2[i]} for i in range(len(cls))]
......@@ -189,6 +204,37 @@ class CMS:
else:
return []
def getCookie(self):
name = self.getName()
if not self.db:
msg = f'{name}未提供数据库连接'
print(msg)
return False
res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
if res:
return res.cookie or None
else:
return None
def saveCookie(self,cookie):
name = self.getName()
if not self.db:
msg = f'{name}未提供数据库连接'
print(msg)
return False
res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
if res:
res.cookie = cookie
self.db.session.add(res)
else:
res = self.RuleClass(name=name, cookie=cookie)
self.db.session.add(res)
try:
self.db.session.commit()
logger.info(f'{name}已保存cookie:{cookie}')
except Exception as e:
return f'保存cookie发生了错误:{e}'
def saveClass(self, classes):
if not self.db:
msg = '未提供数据库连接'
......@@ -305,18 +351,18 @@ class CMS:
html = r.text
if self.class_parse and not has_cache:
p = self.class_parse.split(';')
print(p)
# print(p)
jsp = jsoup(self.url)
pdfh = jsp.pdfh
pdfa = jsp.pdfa
pd = jsp.pd
items = pdfa(html,p[0])
print(len(items))
print(items)
# print(len(items))
# print(items)
for item in items:
title = pdfh(item, p[1])
url = pd(item, p[2])
print(url)
# print(url)
tag = url
if len(p) > 3 and p[3].strip():
tag = self.regexp(p[3].strip(),url,0)
......@@ -626,9 +672,22 @@ class CMS:
pq = jsp.pq
videos = []
try:
r = requests.get(url, headers=self.headers)
r = requests.get(url, headers=self.headers,timeout=self.timeout)
r.encoding = self.encoding
html = r.text
# print(html)
if html.find('输入验证码') > -1:
cookie = verifyCode(url,self.headers,self.timeout,self.retry_count)
if not cookie:
return {
'list': videos
}
self.saveCookie(cookie)
self.headers['cookie'] = cookie
r = requests.get(url, headers=self.headers, timeout=self.timeout)
r.encoding = self.encoding
html = r.text
items = pdfa(html, p[0])
# print(items)
videos = []
......@@ -709,10 +768,14 @@ if __name__ == '__main__':
from utils import parser
# js_path = f'js/玩偶姐姐.js'
# js_path = f'js/555影视.js'
with open('../js/模板.js', encoding='utf-8') as f:
before = f.read()
js_path = f'js/cokemv.js'
ctx, js_code = parser.runJs(js_path)
rule = ctx.eval('rule')
cms = CMS(rule)
ctx, js_code = parser.runJs(js_path,before=before)
ruleDict = ctx.rule.to_dict()
# ruleDict['id'] = rule # 把路由请求的id装到字典里,后面播放嗅探才能用
cms = CMS(ruleDict)
print(cms.title)
print(cms.homeContent())
# print(cms.categoryContent('5',1))
......@@ -721,4 +784,4 @@ if __name__ == '__main__':
# print(cms.detailContent(1,['https://cokemv.me/voddetail/40573.html']))
# cms.categoryContent('dianying',1)
# print(cms.detailContent(['67391']))
# print(cms.searchContent('斗罗大陆'))
\ No newline at end of file
print(cms.searchContent('斗罗大陆'))
\ No newline at end of file
flask db init
flask db migrate
flask db upgrade
废弃操作仅限 flask-migrate==2.7.0:
# # 导入数据迁移核心类
# from flask_migrate import Migrate
# from flask_script import Manager
# manager = Manager(app)
# 初始化数据迁移
# migrate = Migrate(app, db)
# manager.add_command('sql', MigrateCommand)
# python app.py sql migrate -m “init”
# python app.py sql upgrade
\ No newline at end of file
......@@ -24,4 +24,5 @@ LAZYPARSE_MODE = 1 # 播放解析模式(0 本地 1 局域网 2远程 仅在全
WALL_PAPER_ENABLE = True # 启用自定义壁纸
WALL_PAPER = "https://picsum.photos/1280/720/?blur=10" # 自定义壁纸,可注释
SUP_PORT = 9001 # supervisord 服务端口
RETRY_CNT = 3 # 验证码重试次数
# {% if config.WALL_PAPER %}"wallpaper":"{{ config.WALL_PAPER }}",{% endif %}
\ No newline at end of file
var rule = Object.assign(muban.mxpro,{
title:'555影视',
host:'https://www.5dy6.cc',
headers:{//网站的请求头,完整支持所有的,常带ua和cookies
'User-Agent':'MOBILE_UA',
"Cookie": "searchneed=ok"
},
searchable:1,
quickSearch:1,
});
\ No newline at end of file
......@@ -2,4 +2,6 @@ var rule = Object.assign(muban.mxpro,{
title:'cokemv',
host:'https://cokemv.me',
class_parse:'.navbar-items li:gt(1):lt(7);a&&Text;a&&href;/(\\d+).html',
searchable:1,
quickSearch:1,
});
\ No newline at end of file
var rule = Object.assign(muban.首图2,{
title:'在线之家',
host:'https://zxzj.vip',
});
\ No newline at end of file
......@@ -3,7 +3,7 @@ title:'夜空',
host:'https://www.yekong.cc',
url:'/pianku-fyclass--------fypage---/',
searchUrl:'/search-**----------fypage---/',
searchable:0,
searchable:1,
quickSearch:0,
class_parse:'.navbar-items li:gt(1):lt(7);a&&Text;a&&href;.*v/(.*?)/',
});
\ No newline at end of file
var rule = Object.assign(muban.首图,{
title:'影视工厂',
host:'https://www.ysgc.cc',
searchable:1,
quickSearch:1,
});
\ No newline at end of file
......@@ -21,7 +21,7 @@ var muban = {
quickSearch:0,//是否启用快速搜索,
headers:{//网站的请求头,完整支持所有的,常带ua和cookies
'User-Agent':'MOBILE_UA',
"Cookie": "searchneed=ok"
// "Cookie": "searchneed=ok"
},
class_parse:'.navbar-items li:gt(2):lt(8);a&&Text;a&&href;/(\\d+).html',
play_parse:true,
......@@ -32,6 +32,48 @@ var muban = {
一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href',
二级:{"title":"h1&&Text;.module-info-tag&&Text","img":".lazyload&&data-original","desc":".module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text","content":".module-info-introduction&&Text","tabs":".module-tab-item","lists":".module-play-list:eq(#id) a"},
搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text',
},
首图:{
title:'',
host:'',
url:'/vodshow/fyclass--------fypage---/',
searchUrl:'/vodsearch/**----------fypage---.html',
searchable:0,//是否启用全局搜索,
quickSearch:0,//是否启用快速搜索,
headers:{//网站的请求头,完整支持所有的,常带ua和cookies
'User-Agent':'MOBILE_UA',
// "Cookie": "searchneed=ok"
},
class_parse:'.myui-header__menu li.hidden-sm:gt(0):lt(5);a&&Text;a&&href;/(\\d+).html',
play_parse:true,
lazy:'',
limit:10,
推荐:'ul.myui-vodlist.clearfix;li;a&&title;a&&data-original;.pic-text&&Text;a&&href',
double:true, // 推荐内容是否双层定位
一级:'.myui-vodlist li;a&&title;a&&data-original;.pic-text&&Text;a&&href',
二级:{"title":".myui-content__detail .title&&Text;.myui-content__detail p:eq(-2)&&Text","img":".myui-content__thumb .lazyload&&data-original","desc":".myui-content__detail p:eq(0)&&Text;.myui-content__detail p:eq(1)&&Text;.myui-content__detail p:eq(2)&&Text","content":".content&&Text","tabs":".nav-tabs:eq(0) li","lists":".myui-content__list:eq(#id) li"},
搜索:'#searchList li;a&&title;.lazyload&&data-original;.text-muted&&Text;a&&href;.text-muted:eq(-1)&&Text',
},
首图2:{
title:'',
host:'',
url:'/list/fyclass-fypage.html',
searchUrl:'/vodsearch/**----------fypage---.html',
searchable:0,
quickSearch:0,
headers:{
'User-Agent':'MOBILE_UA',
// "Cookie": ""
},
class_parse:'.stui-header__menu li:gt(0):lt(7);a&&Text;a&&href;/(\\d+).html',
play_parse:true,
lazy:'',
limit:10,
推荐:'ul.stui-vodlist.clearfix;li;a&&title;.lazyload&&data-original;.pic-text&&Text;a&&href',
double:true, // 推荐内容是否双层定位
一级:'.stui-vodlist li;a&&title;a&&data-original;.pic-text&&Text;a&&href',
二级:{"title":".stui-content__detail .title&&Text;.stui-content__detail p:eq(-2)&&Text","img":".stui-content__thumb .lazyload&&data-original","desc":".stui-content__detail p:eq(0)&&Text;.stui-content__detail p:eq(1)&&Text;.stui-content__detail p:eq(2)&&Text","content":".detail&&Text","tabs":".stui-vodlist__head","lists":".stui-content__playlist:eq(#id) li"},
搜索:'#searchList li;a&&title;.lazyload&&data-original;.text-muted&&Text;a&&href;.text-muted:eq(-1)&&Text',
}
......
......@@ -2,7 +2,7 @@ var rule = Object.assign(muban.mxpro,{
title:'看视界',
host:'https://www.1080kan.cc',
url:'/show/fyclass--------fypage---.html',
searchUrl:'/search/**-------------.html',
searchUrl:'/search/**----------fypage---.html',
searchable:0,
quickSearch:0,
class_parse:'.navbar-items li:gt(1):lt(6);a&&Text;a&&href;.*/(.*?).html',
......
......@@ -2,5 +2,8 @@ var rule = Object.assign(muban.mxpro,{
title:'莫扎兔',
host:'https://www.mozhatu.com',
url:'/index.php/vod/show/id/fyclass/page/fypage.html',
searchUrl:'/index.php/vod/search/page/fypage/wd/**.html',
class_parse:'.navbar-items li:gt(2):lt(8);a&&Text;a&&href;.*/(.*?).html',
searchable:1,
quickSearch:1,
});
\ No newline at end of file
......@@ -15,10 +15,11 @@ def init(db):
name = db.Column(db.String(20),unique=True)
class_name = db.Column(db.String(255))
class_url = db.Column(db.String(255))
cookie = db.Column(db.String(255))
def __repr__(self):
return "<RuleClass(name='%s', class_name='%s', class_url='%s')>" % (
self.name, self.class_name, self.class_url)
return "<RuleClass(name='%s', class_name='%s', class_url='%s',cookie='%s')>" % (
self.name, self.class_name, self.class_url,self.cookie)
# db.create_all()
db.create_all()
......
无法预览此类型文件
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File : 测试过验证.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date : 2022/8/30
import requests
import ddddocr
from time import sleep,time
from urllib.parse import urljoin,quote,unquote
import requests.utils
url = 'https://cokemv.me/vodsearch/斗罗大陆----------1---.html'
PC_UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36'
headers = {'user-agent':PC_UA}
def getHome(url):
# http://www.baidu.com:9000/323
urls = url.split('//')
homeUrl = urls[0] + '//' + urls[1].split('/')[0]
return homeUrl
def verifyCode(url,total_cnt=3):
headers['Referer'] = getHome(url)
cnt = 0
ocr = ddddocr.DdddOcr()
while cnt < total_cnt:
s = requests.session()
try:
img = s.get(url="https://cokemv.me/index.php/verify/index.html?t="+str(time()), headers=headers,timeout=5).content
code = ocr.classification(img)
print('验证结果:',code)
res = s.post(
url=f"https://cokemv.me/index.php/ajax/verify_check?type=search&verify={code}",
headers=headers
).json()
if res["msg"] == "ok":
cookies_dict = requests.utils.dict_from_cookiejar(s.cookies)
cookie_str = ';'.join([f'{k}={cookies_dict[k]}' for k in cookies_dict])
# return cookies_dict
return cookie_str
except:
pass
cnt += 1
sleep(1)
return ''
r = requests.get(url,headers=headers)
html = r.text
print(html)
if html.find('输入验证码') > -1:
s = verifyCode(url)
print(s)
\ No newline at end of file
......@@ -11,6 +11,7 @@
- [X] 8.增加外网免嗅(自定义config.py里面改)
- [X] 9.增加错误处理和首页单个详情获取
- [X] 10.增加本地直播地址自定义
- [X] 11.增加数据库迁移,[新版教程](https://www.cjavapy.com/article/1977/) [旧版教程](https://www.cnblogs.com/LoveMoney-MrLi/articles/15765985.html)
###### 2022/08/29
- [X] 1.更换js引擎,速度更快性能更好
- [X] 2.新版js支持与python互动,后期可能支持js免嗅(lazy:'js:xxx')
......
js2py
pyquery
flask
requests
flask_migrate
flask-sqlalchemy
requests
netifaces
gevent ; python_version < '3.9'
supervisor ; sys_platform != 'win32' and python_version < '3.9'
func_timeout
easydict
\ No newline at end of file
easydict
ddddocr
\ No newline at end of file
......@@ -6,8 +6,11 @@
import base64
import requests
import requests.utils
from time import sleep
import os
from utils.web import UC_UA,PC_UA
import ddddocr
def getPreJs():
base_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) # 上级目
......@@ -27,6 +30,41 @@ def getCryptoJS():
code = f.read()
return code
def getHome(url):
# http://www.baidu.com:9000/323
urls = url.split('//')
homeUrl = urls[0] + '//' + urls[1].split('/')[0]
return homeUrl
def verifyCode(url,headers,timeout=5,total_cnt=3):
lower_keys = list(map(lambda x: x.lower(), headers.keys()))
host = getHome(url)
if not 'referer' in lower_keys:
headers['Referer'] = host
print(f'开始自动过验证,请求头:{headers}')
cnt = 0
ocr = ddddocr.DdddOcr()
while cnt < total_cnt:
s = requests.session()
try:
img = s.get(url=f"{host}/index.php/verify/index.html", headers=headers,timeout=timeout).content
code = ocr.classification(img)
print(f'第{cnt+1}次验证码识别结果:{code}')
res = s.post(
url=f"{host}/index.php/ajax/verify_check?type=search&verify={code}",
headers=headers).json()
if res["msg"] == "ok":
cookies_dict = requests.utils.dict_from_cookiejar(s.cookies)
cookie_str = ';'.join([f'{k}={cookies_dict[k]}' for k in cookies_dict])
# return cookies_dict
return cookie_str
except:
print(f'第{cnt+1}次验证码提交失败')
pass
cnt += 1
sleep(1)
return ''
def base64Encode(text):
return base64.b64encode(text.encode("utf8")).decode("utf-8") #base64编码
......
......@@ -51,10 +51,12 @@ def runJs(jsPath, before='', after='', ctx=None):
js_code = fp.read()
else:
js_path = os.path.join(base_path, jsPath)
# print(js_path)
with open(js_path, 'r', encoding='UTF-8') as fp:
js_code = fp.read()
# print(js_code)
jscode_to_run = js_code
# print(jscode_to_run)
if before:
jscode_to_run = before + jscode_to_run
if after:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册