提交 312d6215 编写于 作者: H hjdhnx

增加了蓝莓影视筛选示例

上级 ec281cb0
......@@ -621,6 +621,22 @@ class CMS:
t1 = time()
pg = str(fypage)
url = self.url.replace('fyclass',fyclass)
if self.filter_url:
if not 'fyfilter' in url: # 第一种情况,默认不写fyfilter关键字,视为直接拼接在链接后面当参数
if not url.endswith('&') and not self.filter_url.startswith('&'):
url += '&'
url += self.filter_url
else: # 第二种情况直接替换关键字为待拼接的结果后面渲染,适用于 ----fypage.html的情况
url = url.replace('fyfilter', self.filter_url)
url = render_template_string(url,fl=fl)
# fl_url = render_template_string(self.filter_url,fl=fl)
# if not 'fyfilter' in url: # 第一种情况,默认不写fyfilter关键字,视为直接拼接在链接后面当参数
# if not url.endswith('&') and not fl_url.startswith('&'):
# url += '&'
# url += fl_url
# else: # 第二种情况直接替换关键字为渲染后的结果,适用于 ----fypage.html的情况
# url = url.replace('fyfilter',fl_url)
if url.find('fypage') > -1:
if '(' in url and ')' in url:
# url_rep = url[url.find('('):url.find(')')+1]
......@@ -639,15 +655,7 @@ class CMS:
url = url.replace('fypage',pg)
if fypage == 1 and self.test('[\[\]]',url):
url = url.split('[')[1].split(']')[0]
if self.filter_url:
fl_url = render_template_string(self.filter_url,fl=fl)
if not 'fyfilter' in url: # 第一种情况,默认不写fyfilter关键字,视为直接拼接在链接后面当参数
if not url.endswith('&') and not fl_url.startswith('&'):
url += '&'
url += fl_url
else: # 第二种情况直接替换关键字为渲染后的结果,适用于 ----fypage.html的情况
url = url.replace('fyfilter',fl_url)
print(url)
# print(url)
p = self.一级
jsp = jsoup(self.url)
videos = []
......
此差异已折叠。
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File : mxpro筛选.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date : 2022/9/13
import re
import requests
from utils.htmlParser import jsoup
headers = {'user-agent':'Mozilla/5.0 (Linux; Android 11; M2007J3SC Build/RKQ1.200826.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.120 MQQBrowser/6.2 TBS/045714 Mobile Safari/537.36'}
def getFilters(url):
cate_id = str(re.search('.*/(\d+)', url).groups()[0])
print(cate_id)
jsp = jsoup(url)
pdfh = jsp.pdfh
pdfa = jsp.pdfa
print(jsp)
r = requests.get(url,headers=headers)
r.encoding = r.apparent_encoding
html = r.text
cls_list = pdfa(html,'.module-class&&.module-class-items')
print(len(cls_list))
ft_dict = {cate_id:[]}
for cls in cls_list:
tt = pdfh(cls,'.module-item-title&&Text')
values = pdfa(cls,'.module-item-box&&a')
# vl = [{"n":pdfh(i,'a&&Text'),"v":pdfh(i,'a&&href')} for i in values]
vl = [{"n":pdfh(i,'a&&Text'),"v":re.search('(.*?)-(.*)',pdfh(i,'a&&href'),re.M|re.I|re.S).groups()[1].replace('.html','').replace('-','')} for i in values]
ft_dict[cate_id].append({
'key': tt,
'name': tt,
'value': vl
})
print(ft_dict)
return ft_dict
if __name__ == '__main__':
new_dict = {}
for i in '20&1&2&3&4&23'.split('&'):
ft_dict = getFilters(f'https://lanmeiguojiang.com/show/{i}-----------.html')
new_dict.update(ft_dict)
print(new_dict)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册