diff --git a/app.py b/app.py index 10c2c358610bf099d046bded417c0e783f173a57..c34d71f71228df2d0bcc42acb516fae9b1dbf2a7 100644 --- a/app.py +++ b/app.py @@ -6,6 +6,10 @@ from flask_sqlalchemy import SQLAlchemy import config +import socket +from gevent.pywsgi import WSGIServer +import warnings +warnings.filterwarnings('ignore') import os from flask import Flask, jsonify, abort,request,redirect,make_response,render_template,send_from_directory @@ -19,9 +23,11 @@ import json sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach()) app = Flask(__name__,static_folder='static',static_url_path='/static') + # app.config["JSON_AS_ASCII"] = False # jsonify返回的中文正常显示 app.config.from_object(config) # 单独的配置文件里写了,这里就不用弄json中文显示了 db = SQLAlchemy(app) + rule_list = getRules() print(rule_list) @@ -68,7 +74,7 @@ def vod(): if not js_code: return jsonify(error.failed('爬虫规则加载失败')) rule = ctx.eval('rule') - cms = CMS(rule) + cms = CMS(rule,db,RuleClass) wd = getParmas('wd') ac = getParmas('ac') quick = getParmas('quick') @@ -123,9 +129,12 @@ def getRules(path='cache'): rules = {'list': rule_list, 'count': len(rule_list)} return rules -def getHost(mode=0): - ip = request.remote_addr - port = request.environ.get('SERVER_PORT') +def getHost(mode=0,port=None): + port = port or request.environ.get('SERVER_PORT') + hostname = socket.gethostname() + ip = socket.gethostbyname(hostname) + # ip = request.remote_addr + # print(ip) # mode 为0是本地,1是局域网 2是线上 if mode == 0: host = f'localhost:{port}' @@ -218,5 +227,9 @@ def database(): if __name__ == '__main__': - app.run(host="0.0.0.0", port=5705) - # app.run(debug=True, host='0.0.0.0', port=5705) \ No newline at end of file + print(f'http://{getHost(1, 5705)}/index') + # app.run(host="0.0.0.0", port=5705) + # app.run(debug=True, host='0.0.0.0', port=5705) + print('http://localhost:5705/index') + WSGIServer(('0.0.0.0', 5705), app).serve_forever() + # WSGIServer(('0.0.0.0', 5705), app,log=None).serve_forever() \ No newline at end of file diff --git a/classes/cms.py b/classes/cms.py index 6bed05cde64dd10e4b63752474b14587e881894f..ac6edfd8322c681794fdf63a7ab5bd14696bf146 100644 --- a/classes/cms.py +++ b/classes/cms.py @@ -7,13 +7,21 @@ import requests import re import math from utils.web import * +from models import * from utils.config import config from utils.htmlParser import jsoup from urllib.parse import urljoin from concurrent.futures import ThreadPoolExecutor # 引入线程池 +import logging + +_logger = logging.getLogger(__name__) + +print(_logger) class CMS: - def __init__(self,rule): + def __init__(self,rule,db=None,RuleClass=None): + self.db = db + self.RuleClass = RuleClass host = rule.get('host','').rstrip('/') timeout = rule.get('timeout',5000) homeUrl = rule.get('homeUrl','/') @@ -113,6 +121,56 @@ class CMS: pq = jsp.pq return pdfh,pdfa,pd,pq + def getClasses(self): + if not self.db: + msg = '未提供数据库连接' + print(msg) + return [] + name = self.getName() + # self.db.metadata.clear() + # RuleClass = rule_classes.init(self.db) + res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first() + # _logger.info('xxxxxx') + if res: + cls = res.class_name.split('&') + cls2 = res.class_url.split('&') + classes = [{'type_name':cls[i],'type_id':cls2[i]} for i in range(len(cls))] + # _logger.info(classes) + return classes + else: + return [] + + def saveClass(self, classes): + if not self.db: + msg = '未提供数据库连接' + print(msg) + return msg + name = self.getName() + class_name = '&'.join([cl['type_name'] for cl in classes]) + class_url = '&'.join([cl['type_id'] for cl in classes]) + # data = RuleClass.query.filter(RuleClass.name == '555影视').all() + # self.db.metadata.clear() + # RuleClass = rule_classes.init(self.db) + res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first() + print(res) + if res: + res.class_name = class_name + res.class_url = class_url + self.db.session.add(res) + msg = f'修改成功:{res.id}' + else: + res = self.RuleClass(name=name, class_name=class_name, class_url=class_url) + self.db.session.add(res) + res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first() + msg = f'新增成功:{res.id}' + + try: + self.db.session.commit() + print(msg) + except Exception as e: + return f'发生了错误:{e}' + + def homeContent(self,fypage=1): # yanaifei # https://yanetflix.com/vodtype/dianying.html @@ -130,14 +188,21 @@ class CMS: 'type_id': class_urls[i] }) # print(self.url) + has_cache = False if self.homeUrl.startswith('http'): # print(self.homeUrl) # print(self.class_parse) try: - r = requests.get(self.homeUrl,headers=self.headers,timeout=self.timeout) + if self.class_parse: + cache_classes = self.getClasses() + if len(cache_classes) > 0: + classes = cache_classes + has_cache = True + + r = requests.get(self.homeUrl, headers=self.headers, timeout=self.timeout) r.encoding = self.encoding html = r.text - if self.class_parse: + if self.class_parse and not has_cache: p = self.class_parse.split(';') jsp = jsoup(self.url) pdfh = jsp.pdfh @@ -154,7 +219,7 @@ class CMS: 'type_name': title, 'type_id': tag }) - + self.saveClass(classes) video_result = self.homeVideoContent(html,fypage) except Exception as e: print(e) @@ -453,11 +518,11 @@ if __name__ == '__main__': rule = ctx.eval('rule') cms = CMS(rule) print(cms.title) - # print(cms.homeContent()) + print(cms.homeContent()) # print(cms.categoryContent('5',1)) # print(cms.categoryContent('latest',1)) # print(cms.detailContent(['https://www.2345ka.com/v/45499.html'])) # print(cms.detailContent(1,['https://cokemv.me/voddetail/40573.html'])) # cms.categoryContent('dianying',1) # print(cms.detailContent(['67391'])) - print(cms.searchContent('斗罗大陆')) \ No newline at end of file + # print(cms.searchContent('斗罗大陆')) \ No newline at end of file diff --git a/models/rules.db b/models/rules.db index 70d328beb14de5b7fbcf79a8588aec8134299893..f126d74e8c769d6c173999dd531e00dc1aa775ef 100644 Binary files a/models/rules.db and b/models/rules.db differ diff --git a/readme.md b/readme.md index bf4b664f8f73681f307898191fbe21bcd40e79f4..839a38d357c18f7a82c32af49d293f19bc98595a 100644 --- a/readme.md +++ b/readme.md @@ -8,6 +8,7 @@ - [X] 5.增加 flask-sqlalchemy 用于驱动sqlite3数据库 - [ ] 6.引入sqlite3数据进行缓存分类定位到的标签 - [ ] 7.增加filter一键爬取和入库(filter_name,filter_url,filter_parse) +- [X] 8.使用gevent作为服务,提升大量性能 ###### 2022/08/26 - [X] 1.支持首页推荐功能,模板属性增加limit参数 - [X] 2.支持纯一级的功能(比如车车网没二级) diff --git a/requirements.txt b/requirements.txt index d9b0513b6607f8b5b585b7b124f031708da04ce8..402dba2db1eb5326b244653cc60a9e3b2a388f57 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ PyExecJS pyquery flask requests -flask-sqlalchemy \ No newline at end of file +flask-sqlalchemy +gevent \ No newline at end of file