提交 9358f192 编写于 作者: H hjdhnx

完成pdfh系列函数

上级 a2913258
var rule = {
title:'鸭奈飞',
url:'https://yanetflix.com/vodshow/dianying--------fypage---.html',
url:'https://yanetflix.com/vodshow/fyclass--------fypage---.html',
// url:'https://yanetflix.com/vodshow/',
searchUrl:'/vodsearch/**----------fypage---.html',
ua:'MOBILE_UA',
class_name:'',
class_url:'测试',
一级:'',
class_name:'电影&连续剧&综艺&动漫',
class_url:'dianying&lianxuju&zongyi&dongman',
一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href',
二级:'',
搜索:'',
}
\ No newline at end of file
......@@ -3,11 +3,15 @@
# File : cms.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date : 2022/8/25
import requests
from utils.web import *
from utils.config import config
from utils.htmlParser import jsoup
class CMS:
def __init__(self,rule):
self.url = rule.get('url','')
self.url = rule.get('url','').rstrip('/')
self.searchUrl = rule.get('searchUrl','')
ua = rule.get('ua','')
if ua == 'MOBILE_UA':
......@@ -23,14 +27,111 @@ class CMS:
self.二级 = rule.get('二级','')
self.搜索 = rule.get('搜索','')
self.title = rule.get('title','')
self.filter = rule.get('filter',[])
self.extend = rule.get('extend',[])
def getName(self):
return self.title
def homeContent(self):
# yanaifei
# https://yanetflix.com/vodtype/dianying.html
result = {}
class_names = self.class_name.split('&')
class_urls = self.class_url.split('&')
cnt = min(len(class_urls),len(class_names))
classes = []
for i in range(cnt):
classes.append({
'type_name': class_names[i],
'type_id': class_urls[i]
})
result['class'] = classes
if self.filter:
result['filters'] = config['filter']
return result
def homeVideoContent(self):
rsp = self.fetch("https://www.genmov.com/", headers=self.header)
root = self.html(rsp.text)
aList = root.xpath("//div[@class='module module-wrapper']//div[@class='module-item']")
videos = []
for a in aList:
name = a.xpath(".//div[@class='module-item-pic']/a/@title")[0]
pic = a.xpath(".//div[@class='module-item-pic']/img/@data-src")[0]
mark = a.xpath("./div[@class='module-item-text']/text()")[0]
sid = a.xpath(".//div[@class='module-item-pic']/a/@href")[0]
sid = self.regStr(sid, "/video/(\\S+).html")
videos.append({
"vod_id": sid,
"vod_name": name,
"vod_pic": pic,
"vod_remarks": mark
})
result = {
'list': videos
}
return result
def categoryContent(self, fyclass, fypage):
"""
一级带分类的数据返回
:param fyclass: 分类标识
:param fypage: 页码
:return: cms一级数据
"""
result = {}
# urlParams = ["", "", "", "", "", "", "", "", "", "", "", ""]
# urlParams = [""] * 12
# urlParams[0] = tid
# urlParams[8] = str(pg)
# for key in self.extend:
# urlParams[int(key)] = self.extend[key]
# params = '-'.join(urlParams)
# print(params)
# url = self.url + '/{0}.html'.format(params)
pg = str(fypage)
url = self.url.replace('fyclass',fyclass).replace('fypage',fypage)
print(url)
headers = {'user-agent': self.ua}
r = requests.get(url, headers=headers)
p = self.一级.split(';') # 解析
jsp = jsoup(self.url)
pdfh = jsp.pdfh
pdfa = jsp.pdfa
pd = jsp.pd
items = pdfa(r.text, p[0])
videos = []
for item in items:
# print(item)
title = pdfh(item, p[1])
img = pd(item, p[2])
desc = pdfh(item, p[3])
link = pd(item, p[4])
content = ''
# sid = self.regStr(sid, "/video/(\\S+).html")
videos.append({
"vod_id": link,
"vod_name": title,
"vod_pic": img,
"vod_remarks": desc,
"vod_content": content,
})
result['list'] = videos
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
return result
if __name__ == '__main__':
from utils import parser
js_path = f'js/鸭奈飞.js'
ctx, js_code = parser.runJs(js_path)
rule = ctx.eval('rule')
cms = CMS(rule)
print(cms.title)
\ No newline at end of file
print(cms.title)
print(cms.homeContent())
cms.categoryContent('dianying',1)
\ No newline at end of file
......@@ -3,7 +3,7 @@
"name":"鸭奈飞",
"type":4,
"api":"http://127.0.0.1:9000/vod?rule=鸭奈飞",
"searchable":1,
"quickSearch":1,
"filterable":0
"searchable": 1,
"quickSearch": 1,
"filterable": 1
}
\ No newline at end of file
此差异已折叠。
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File : htmlParser.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date : 2022/8/25
from pyquery import PyQuery as pq
from urllib.parse import urljoin
class jsoup:
def __init__(self,MY_URL=''):
self.MY_URL = MY_URL
def pdfh(self,html,parse,pd=False):
doc = pq(html)
option = None
if parse.find('&&') > -1:
option = parse.split('&&')[1]
parse = parse.split('&&')[0]
ret = doc(parse)
if option:
if option == 'Text':
ret = ret.text()
elif option == 'Html':
ret = ret.html()
else:
ret = ret.attr(option)
if pd and option in ['url','src','href','data-original']:
ret = urljoin(self.MY_URL,ret)
else:
ret = ret('fisrt').html()
return ret
def pdfa(self,html,parse):
doc = pq(html)
# print(doc(parse)[0])
# return [item.html() for item in doc(parse).items()]
return [str(item) for item in doc(parse).items()]
def pd(self,html,parse):
return self.pdfh(html,parse,True)
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册