提交 fc56f5dc 编写于 作者: H hjdhnx

优化了运行性能

上级 2c198c3c
......@@ -6,6 +6,10 @@
from flask_sqlalchemy import SQLAlchemy
import config
import socket
from gevent.pywsgi import WSGIServer
import warnings
warnings.filterwarnings('ignore')
import os
from flask import Flask, jsonify, abort,request,redirect,make_response,render_template,send_from_directory
......@@ -19,9 +23,11 @@ import json
sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
app = Flask(__name__,static_folder='static',static_url_path='/static')
# app.config["JSON_AS_ASCII"] = False # jsonify返回的中文正常显示
app.config.from_object(config) # 单独的配置文件里写了,这里就不用弄json中文显示了
db = SQLAlchemy(app)
rule_list = getRules()
print(rule_list)
......@@ -68,7 +74,7 @@ def vod():
if not js_code:
return jsonify(error.failed('爬虫规则加载失败'))
rule = ctx.eval('rule')
cms = CMS(rule)
cms = CMS(rule,db,RuleClass)
wd = getParmas('wd')
ac = getParmas('ac')
quick = getParmas('quick')
......@@ -123,9 +129,12 @@ def getRules(path='cache'):
rules = {'list': rule_list, 'count': len(rule_list)}
return rules
def getHost(mode=0):
ip = request.remote_addr
port = request.environ.get('SERVER_PORT')
def getHost(mode=0,port=None):
port = port or request.environ.get('SERVER_PORT')
hostname = socket.gethostname()
ip = socket.gethostbyname(hostname)
# ip = request.remote_addr
# print(ip)
# mode 为0是本地,1是局域网 2是线上
if mode == 0:
host = f'localhost:{port}'
......@@ -218,5 +227,9 @@ def database():
if __name__ == '__main__':
app.run(host="0.0.0.0", port=5705)
# app.run(debug=True, host='0.0.0.0', port=5705)
\ No newline at end of file
print(f'http://{getHost(1, 5705)}/index')
# app.run(host="0.0.0.0", port=5705)
# app.run(debug=True, host='0.0.0.0', port=5705)
print('http://localhost:5705/index')
WSGIServer(('0.0.0.0', 5705), app).serve_forever()
# WSGIServer(('0.0.0.0', 5705), app,log=None).serve_forever()
\ No newline at end of file
......@@ -7,13 +7,21 @@ import requests
import re
import math
from utils.web import *
from models import *
from utils.config import config
from utils.htmlParser import jsoup
from urllib.parse import urljoin
from concurrent.futures import ThreadPoolExecutor # 引入线程池
import logging
_logger = logging.getLogger(__name__)
print(_logger)
class CMS:
def __init__(self,rule):
def __init__(self,rule,db=None,RuleClass=None):
self.db = db
self.RuleClass = RuleClass
host = rule.get('host','').rstrip('/')
timeout = rule.get('timeout',5000)
homeUrl = rule.get('homeUrl','/')
......@@ -113,6 +121,56 @@ class CMS:
pq = jsp.pq
return pdfh,pdfa,pd,pq
def getClasses(self):
if not self.db:
msg = '未提供数据库连接'
print(msg)
return []
name = self.getName()
# self.db.metadata.clear()
# RuleClass = rule_classes.init(self.db)
res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
# _logger.info('xxxxxx')
if res:
cls = res.class_name.split('&')
cls2 = res.class_url.split('&')
classes = [{'type_name':cls[i],'type_id':cls2[i]} for i in range(len(cls))]
# _logger.info(classes)
return classes
else:
return []
def saveClass(self, classes):
if not self.db:
msg = '未提供数据库连接'
print(msg)
return msg
name = self.getName()
class_name = '&'.join([cl['type_name'] for cl in classes])
class_url = '&'.join([cl['type_id'] for cl in classes])
# data = RuleClass.query.filter(RuleClass.name == '555影视').all()
# self.db.metadata.clear()
# RuleClass = rule_classes.init(self.db)
res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
print(res)
if res:
res.class_name = class_name
res.class_url = class_url
self.db.session.add(res)
msg = f'修改成功:{res.id}'
else:
res = self.RuleClass(name=name, class_name=class_name, class_url=class_url)
self.db.session.add(res)
res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
msg = f'新增成功:{res.id}'
try:
self.db.session.commit()
print(msg)
except Exception as e:
return f'发生了错误:{e}'
def homeContent(self,fypage=1):
# yanaifei
# https://yanetflix.com/vodtype/dianying.html
......@@ -130,14 +188,21 @@ class CMS:
'type_id': class_urls[i]
})
# print(self.url)
has_cache = False
if self.homeUrl.startswith('http'):
# print(self.homeUrl)
# print(self.class_parse)
try:
r = requests.get(self.homeUrl,headers=self.headers,timeout=self.timeout)
if self.class_parse:
cache_classes = self.getClasses()
if len(cache_classes) > 0:
classes = cache_classes
has_cache = True
r = requests.get(self.homeUrl, headers=self.headers, timeout=self.timeout)
r.encoding = self.encoding
html = r.text
if self.class_parse:
if self.class_parse and not has_cache:
p = self.class_parse.split(';')
jsp = jsoup(self.url)
pdfh = jsp.pdfh
......@@ -154,7 +219,7 @@ class CMS:
'type_name': title,
'type_id': tag
})
self.saveClass(classes)
video_result = self.homeVideoContent(html,fypage)
except Exception as e:
print(e)
......@@ -453,11 +518,11 @@ if __name__ == '__main__':
rule = ctx.eval('rule')
cms = CMS(rule)
print(cms.title)
# print(cms.homeContent())
print(cms.homeContent())
# print(cms.categoryContent('5',1))
# print(cms.categoryContent('latest',1))
# print(cms.detailContent(['https://www.2345ka.com/v/45499.html']))
# print(cms.detailContent(1,['https://cokemv.me/voddetail/40573.html']))
# cms.categoryContent('dianying',1)
# print(cms.detailContent(['67391']))
print(cms.searchContent('斗罗大陆'))
\ No newline at end of file
# print(cms.searchContent('斗罗大陆'))
\ No newline at end of file
无法预览此类型文件
......@@ -8,6 +8,7 @@
- [X] 5.增加 flask-sqlalchemy 用于驱动sqlite3数据库
- [ ] 6.引入sqlite3数据进行缓存分类定位到的标签
- [ ] 7.增加filter一键爬取和入库(filter_name,filter_url,filter_parse)
- [X] 8.使用gevent作为服务,提升大量性能
###### 2022/08/26
- [X] 1.支持首页推荐功能,模板属性增加limit参数
- [X] 2.支持纯一级的功能(比如车车网没二级)
......
......@@ -2,4 +2,5 @@ PyExecJS
pyquery
flask
requests
flask-sqlalchemy
\ No newline at end of file
flask-sqlalchemy
gevent
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册