提交 fc56f5dc 编写于 作者: H hjdhnx

优化了运行性能

上级 2c198c3c
...@@ -6,6 +6,10 @@ ...@@ -6,6 +6,10 @@
from flask_sqlalchemy import SQLAlchemy from flask_sqlalchemy import SQLAlchemy
import config import config
import socket
from gevent.pywsgi import WSGIServer
import warnings
warnings.filterwarnings('ignore')
import os import os
from flask import Flask, jsonify, abort,request,redirect,make_response,render_template,send_from_directory from flask import Flask, jsonify, abort,request,redirect,make_response,render_template,send_from_directory
...@@ -19,9 +23,11 @@ import json ...@@ -19,9 +23,11 @@ import json
sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach()) sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
app = Flask(__name__,static_folder='static',static_url_path='/static') app = Flask(__name__,static_folder='static',static_url_path='/static')
# app.config["JSON_AS_ASCII"] = False # jsonify返回的中文正常显示 # app.config["JSON_AS_ASCII"] = False # jsonify返回的中文正常显示
app.config.from_object(config) # 单独的配置文件里写了,这里就不用弄json中文显示了 app.config.from_object(config) # 单独的配置文件里写了,这里就不用弄json中文显示了
db = SQLAlchemy(app) db = SQLAlchemy(app)
rule_list = getRules() rule_list = getRules()
print(rule_list) print(rule_list)
...@@ -68,7 +74,7 @@ def vod(): ...@@ -68,7 +74,7 @@ def vod():
if not js_code: if not js_code:
return jsonify(error.failed('爬虫规则加载失败')) return jsonify(error.failed('爬虫规则加载失败'))
rule = ctx.eval('rule') rule = ctx.eval('rule')
cms = CMS(rule) cms = CMS(rule,db,RuleClass)
wd = getParmas('wd') wd = getParmas('wd')
ac = getParmas('ac') ac = getParmas('ac')
quick = getParmas('quick') quick = getParmas('quick')
...@@ -123,9 +129,12 @@ def getRules(path='cache'): ...@@ -123,9 +129,12 @@ def getRules(path='cache'):
rules = {'list': rule_list, 'count': len(rule_list)} rules = {'list': rule_list, 'count': len(rule_list)}
return rules return rules
def getHost(mode=0): def getHost(mode=0,port=None):
ip = request.remote_addr port = port or request.environ.get('SERVER_PORT')
port = request.environ.get('SERVER_PORT') hostname = socket.gethostname()
ip = socket.gethostbyname(hostname)
# ip = request.remote_addr
# print(ip)
# mode 为0是本地,1是局域网 2是线上 # mode 为0是本地,1是局域网 2是线上
if mode == 0: if mode == 0:
host = f'localhost:{port}' host = f'localhost:{port}'
...@@ -218,5 +227,9 @@ def database(): ...@@ -218,5 +227,9 @@ def database():
if __name__ == '__main__': if __name__ == '__main__':
app.run(host="0.0.0.0", port=5705) print(f'http://{getHost(1, 5705)}/index')
# app.run(debug=True, host='0.0.0.0', port=5705) # app.run(host="0.0.0.0", port=5705)
\ No newline at end of file # app.run(debug=True, host='0.0.0.0', port=5705)
print('http://localhost:5705/index')
WSGIServer(('0.0.0.0', 5705), app).serve_forever()
# WSGIServer(('0.0.0.0', 5705), app,log=None).serve_forever()
\ No newline at end of file
...@@ -7,13 +7,21 @@ import requests ...@@ -7,13 +7,21 @@ import requests
import re import re
import math import math
from utils.web import * from utils.web import *
from models import *
from utils.config import config from utils.config import config
from utils.htmlParser import jsoup from utils.htmlParser import jsoup
from urllib.parse import urljoin from urllib.parse import urljoin
from concurrent.futures import ThreadPoolExecutor # 引入线程池 from concurrent.futures import ThreadPoolExecutor # 引入线程池
import logging
_logger = logging.getLogger(__name__)
print(_logger)
class CMS: class CMS:
def __init__(self,rule): def __init__(self,rule,db=None,RuleClass=None):
self.db = db
self.RuleClass = RuleClass
host = rule.get('host','').rstrip('/') host = rule.get('host','').rstrip('/')
timeout = rule.get('timeout',5000) timeout = rule.get('timeout',5000)
homeUrl = rule.get('homeUrl','/') homeUrl = rule.get('homeUrl','/')
...@@ -113,6 +121,56 @@ class CMS: ...@@ -113,6 +121,56 @@ class CMS:
pq = jsp.pq pq = jsp.pq
return pdfh,pdfa,pd,pq return pdfh,pdfa,pd,pq
def getClasses(self):
if not self.db:
msg = '未提供数据库连接'
print(msg)
return []
name = self.getName()
# self.db.metadata.clear()
# RuleClass = rule_classes.init(self.db)
res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
# _logger.info('xxxxxx')
if res:
cls = res.class_name.split('&')
cls2 = res.class_url.split('&')
classes = [{'type_name':cls[i],'type_id':cls2[i]} for i in range(len(cls))]
# _logger.info(classes)
return classes
else:
return []
def saveClass(self, classes):
if not self.db:
msg = '未提供数据库连接'
print(msg)
return msg
name = self.getName()
class_name = '&'.join([cl['type_name'] for cl in classes])
class_url = '&'.join([cl['type_id'] for cl in classes])
# data = RuleClass.query.filter(RuleClass.name == '555影视').all()
# self.db.metadata.clear()
# RuleClass = rule_classes.init(self.db)
res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
print(res)
if res:
res.class_name = class_name
res.class_url = class_url
self.db.session.add(res)
msg = f'修改成功:{res.id}'
else:
res = self.RuleClass(name=name, class_name=class_name, class_url=class_url)
self.db.session.add(res)
res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
msg = f'新增成功:{res.id}'
try:
self.db.session.commit()
print(msg)
except Exception as e:
return f'发生了错误:{e}'
def homeContent(self,fypage=1): def homeContent(self,fypage=1):
# yanaifei # yanaifei
# https://yanetflix.com/vodtype/dianying.html # https://yanetflix.com/vodtype/dianying.html
...@@ -130,14 +188,21 @@ class CMS: ...@@ -130,14 +188,21 @@ class CMS:
'type_id': class_urls[i] 'type_id': class_urls[i]
}) })
# print(self.url) # print(self.url)
has_cache = False
if self.homeUrl.startswith('http'): if self.homeUrl.startswith('http'):
# print(self.homeUrl) # print(self.homeUrl)
# print(self.class_parse) # print(self.class_parse)
try: try:
r = requests.get(self.homeUrl,headers=self.headers,timeout=self.timeout) if self.class_parse:
cache_classes = self.getClasses()
if len(cache_classes) > 0:
classes = cache_classes
has_cache = True
r = requests.get(self.homeUrl, headers=self.headers, timeout=self.timeout)
r.encoding = self.encoding r.encoding = self.encoding
html = r.text html = r.text
if self.class_parse: if self.class_parse and not has_cache:
p = self.class_parse.split(';') p = self.class_parse.split(';')
jsp = jsoup(self.url) jsp = jsoup(self.url)
pdfh = jsp.pdfh pdfh = jsp.pdfh
...@@ -154,7 +219,7 @@ class CMS: ...@@ -154,7 +219,7 @@ class CMS:
'type_name': title, 'type_name': title,
'type_id': tag 'type_id': tag
}) })
self.saveClass(classes)
video_result = self.homeVideoContent(html,fypage) video_result = self.homeVideoContent(html,fypage)
except Exception as e: except Exception as e:
print(e) print(e)
...@@ -453,11 +518,11 @@ if __name__ == '__main__': ...@@ -453,11 +518,11 @@ if __name__ == '__main__':
rule = ctx.eval('rule') rule = ctx.eval('rule')
cms = CMS(rule) cms = CMS(rule)
print(cms.title) print(cms.title)
# print(cms.homeContent()) print(cms.homeContent())
# print(cms.categoryContent('5',1)) # print(cms.categoryContent('5',1))
# print(cms.categoryContent('latest',1)) # print(cms.categoryContent('latest',1))
# print(cms.detailContent(['https://www.2345ka.com/v/45499.html'])) # print(cms.detailContent(['https://www.2345ka.com/v/45499.html']))
# print(cms.detailContent(1,['https://cokemv.me/voddetail/40573.html'])) # print(cms.detailContent(1,['https://cokemv.me/voddetail/40573.html']))
# cms.categoryContent('dianying',1) # cms.categoryContent('dianying',1)
# print(cms.detailContent(['67391'])) # print(cms.detailContent(['67391']))
print(cms.searchContent('斗罗大陆')) # print(cms.searchContent('斗罗大陆'))
\ No newline at end of file \ No newline at end of file
无法预览此类型文件
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
- [X] 5.增加 flask-sqlalchemy 用于驱动sqlite3数据库 - [X] 5.增加 flask-sqlalchemy 用于驱动sqlite3数据库
- [ ] 6.引入sqlite3数据进行缓存分类定位到的标签 - [ ] 6.引入sqlite3数据进行缓存分类定位到的标签
- [ ] 7.增加filter一键爬取和入库(filter_name,filter_url,filter_parse) - [ ] 7.增加filter一键爬取和入库(filter_name,filter_url,filter_parse)
- [X] 8.使用gevent作为服务,提升大量性能
###### 2022/08/26 ###### 2022/08/26
- [X] 1.支持首页推荐功能,模板属性增加limit参数 - [X] 1.支持首页推荐功能,模板属性增加limit参数
- [X] 2.支持纯一级的功能(比如车车网没二级) - [X] 2.支持纯一级的功能(比如车车网没二级)
......
...@@ -2,4 +2,5 @@ PyExecJS ...@@ -2,4 +2,5 @@ PyExecJS
pyquery pyquery
flask flask
requests requests
flask-sqlalchemy flask-sqlalchemy
\ No newline at end of file gevent
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册