From fc56f5dca950ec0aeebd5c017dc1bc9cf3b2bc63 Mon Sep 17 00:00:00 2001 From: hjdhnx Date: Sat, 27 Aug 2022 15:15:18 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BA=86=E8=BF=90=E8=A1=8C?= =?UTF-8?q?=E6=80=A7=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app.py | 25 +++++++++++---- classes/cms.py | 77 +++++++++++++++++++++++++++++++++++++++++++---- models/rules.db | Bin 12288 -> 12288 bytes readme.md | 1 + requirements.txt | 3 +- 5 files changed, 93 insertions(+), 13 deletions(-) diff --git a/app.py b/app.py index 10c2c35..c34d71f 100644 --- a/app.py +++ b/app.py @@ -6,6 +6,10 @@ from flask_sqlalchemy import SQLAlchemy import config +import socket +from gevent.pywsgi import WSGIServer +import warnings +warnings.filterwarnings('ignore') import os from flask import Flask, jsonify, abort,request,redirect,make_response,render_template,send_from_directory @@ -19,9 +23,11 @@ import json sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach()) app = Flask(__name__,static_folder='static',static_url_path='/static') + # app.config["JSON_AS_ASCII"] = False # jsonify返回的中文正常显示 app.config.from_object(config) # 单独的配置文件里写了,这里就不用弄json中文显示了 db = SQLAlchemy(app) + rule_list = getRules() print(rule_list) @@ -68,7 +74,7 @@ def vod(): if not js_code: return jsonify(error.failed('爬虫规则加载失败')) rule = ctx.eval('rule') - cms = CMS(rule) + cms = CMS(rule,db,RuleClass) wd = getParmas('wd') ac = getParmas('ac') quick = getParmas('quick') @@ -123,9 +129,12 @@ def getRules(path='cache'): rules = {'list': rule_list, 'count': len(rule_list)} return rules -def getHost(mode=0): - ip = request.remote_addr - port = request.environ.get('SERVER_PORT') +def getHost(mode=0,port=None): + port = port or request.environ.get('SERVER_PORT') + hostname = socket.gethostname() + ip = socket.gethostbyname(hostname) + # ip = request.remote_addr + # print(ip) # mode 为0是本地,1是局域网 2是线上 if mode == 0: host = f'localhost:{port}' @@ -218,5 +227,9 @@ def database(): if __name__ == '__main__': - app.run(host="0.0.0.0", port=5705) - # app.run(debug=True, host='0.0.0.0', port=5705) \ No newline at end of file + print(f'http://{getHost(1, 5705)}/index') + # app.run(host="0.0.0.0", port=5705) + # app.run(debug=True, host='0.0.0.0', port=5705) + print('http://localhost:5705/index') + WSGIServer(('0.0.0.0', 5705), app).serve_forever() + # WSGIServer(('0.0.0.0', 5705), app,log=None).serve_forever() \ No newline at end of file diff --git a/classes/cms.py b/classes/cms.py index 6bed05c..ac6edfd 100644 --- a/classes/cms.py +++ b/classes/cms.py @@ -7,13 +7,21 @@ import requests import re import math from utils.web import * +from models import * from utils.config import config from utils.htmlParser import jsoup from urllib.parse import urljoin from concurrent.futures import ThreadPoolExecutor # 引入线程池 +import logging + +_logger = logging.getLogger(__name__) + +print(_logger) class CMS: - def __init__(self,rule): + def __init__(self,rule,db=None,RuleClass=None): + self.db = db + self.RuleClass = RuleClass host = rule.get('host','').rstrip('/') timeout = rule.get('timeout',5000) homeUrl = rule.get('homeUrl','/') @@ -113,6 +121,56 @@ class CMS: pq = jsp.pq return pdfh,pdfa,pd,pq + def getClasses(self): + if not self.db: + msg = '未提供数据库连接' + print(msg) + return [] + name = self.getName() + # self.db.metadata.clear() + # RuleClass = rule_classes.init(self.db) + res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first() + # _logger.info('xxxxxx') + if res: + cls = res.class_name.split('&') + cls2 = res.class_url.split('&') + classes = [{'type_name':cls[i],'type_id':cls2[i]} for i in range(len(cls))] + # _logger.info(classes) + return classes + else: + return [] + + def saveClass(self, classes): + if not self.db: + msg = '未提供数据库连接' + print(msg) + return msg + name = self.getName() + class_name = '&'.join([cl['type_name'] for cl in classes]) + class_url = '&'.join([cl['type_id'] for cl in classes]) + # data = RuleClass.query.filter(RuleClass.name == '555影视').all() + # self.db.metadata.clear() + # RuleClass = rule_classes.init(self.db) + res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first() + print(res) + if res: + res.class_name = class_name + res.class_url = class_url + self.db.session.add(res) + msg = f'修改成功:{res.id}' + else: + res = self.RuleClass(name=name, class_name=class_name, class_url=class_url) + self.db.session.add(res) + res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first() + msg = f'新增成功:{res.id}' + + try: + self.db.session.commit() + print(msg) + except Exception as e: + return f'发生了错误:{e}' + + def homeContent(self,fypage=1): # yanaifei # https://yanetflix.com/vodtype/dianying.html @@ -130,14 +188,21 @@ class CMS: 'type_id': class_urls[i] }) # print(self.url) + has_cache = False if self.homeUrl.startswith('http'): # print(self.homeUrl) # print(self.class_parse) try: - r = requests.get(self.homeUrl,headers=self.headers,timeout=self.timeout) + if self.class_parse: + cache_classes = self.getClasses() + if len(cache_classes) > 0: + classes = cache_classes + has_cache = True + + r = requests.get(self.homeUrl, headers=self.headers, timeout=self.timeout) r.encoding = self.encoding html = r.text - if self.class_parse: + if self.class_parse and not has_cache: p = self.class_parse.split(';') jsp = jsoup(self.url) pdfh = jsp.pdfh @@ -154,7 +219,7 @@ class CMS: 'type_name': title, 'type_id': tag }) - + self.saveClass(classes) video_result = self.homeVideoContent(html,fypage) except Exception as e: print(e) @@ -453,11 +518,11 @@ if __name__ == '__main__': rule = ctx.eval('rule') cms = CMS(rule) print(cms.title) - # print(cms.homeContent()) + print(cms.homeContent()) # print(cms.categoryContent('5',1)) # print(cms.categoryContent('latest',1)) # print(cms.detailContent(['https://www.2345ka.com/v/45499.html'])) # print(cms.detailContent(1,['https://cokemv.me/voddetail/40573.html'])) # cms.categoryContent('dianying',1) # print(cms.detailContent(['67391'])) - print(cms.searchContent('斗罗大陆')) \ No newline at end of file + # print(cms.searchContent('斗罗大陆')) \ No newline at end of file diff --git a/models/rules.db b/models/rules.db index 70d328beb14de5b7fbcf79a8588aec8134299893..f126d74e8c769d6c173999dd531e00dc1aa775ef 100644 GIT binary patch delta 749 zcmchV%}X0W7{+(qh*%S*9;Jl{;(JeZ3sT#H?V$$`p4w|l3{hOO_(8$b#xCkcblutz zijAa!;5N3FZc{_Lt{*oq{tLbBJF_|WA8^`ipy0WMcbIv2UVigF%;b`}q;*H^dvo`R z*ykjxBpQiL3gm)l4)eFAdx_eb)v0uFPs~$N6^V zH%q9UI(~Bx{2vzGJqj&qSiBvp-|2pZJ6o;hLFnLG0go(PGgzSvbbkje8_hBr?_qv6 zJKJfh+JRH)2rq}{dh4?n>Zzrb)Qmc%E@pw%i+HZ-&lNSJyj)H%EyGM0cluwYy%|=6 z|C#nr*a@jy7)TFc@tBv!3deZr0N(<89rHG@6EDcSfSVtw_X%3%I)58r2Rmrg5^xVj zU=;3y{J4ulQt^~Jm!5w`H;cUQCaH}MNo}so5ebeMc_elV@fSmHFXIoR5ppGvUnC$u o$yL37&?BR_rD5^!{}{O?4T$ZDrQgT)y)b#1zjW3qB0RU2%6nOvu diff --git a/readme.md b/readme.md index bf4b664..839a38d 100644 --- a/readme.md +++ b/readme.md @@ -8,6 +8,7 @@ - [X] 5.增加 flask-sqlalchemy 用于驱动sqlite3数据库 - [ ] 6.引入sqlite3数据进行缓存分类定位到的标签 - [ ] 7.增加filter一键爬取和入库(filter_name,filter_url,filter_parse) +- [X] 8.使用gevent作为服务,提升大量性能 ###### 2022/08/26 - [X] 1.支持首页推荐功能,模板属性增加limit参数 - [X] 2.支持纯一级的功能(比如车车网没二级) diff --git a/requirements.txt b/requirements.txt index d9b0513..402dba2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ PyExecJS pyquery flask requests -flask-sqlalchemy \ No newline at end of file +flask-sqlalchemy +gevent \ No newline at end of file -- GitLab