提交 f160ead3 编写于 作者: H hjdhnx

升级至3.9.9,增加新特性

上级 cbbe21ac
无法预览此类型文件
......@@ -562,33 +562,53 @@ class CMS:
if self.double and len(p) < 6:
return self.blank()
jsp = jsoup(self.homeUrl)
is_json = str(p[0]).startswith('json:')
pp = self.一级.split(';')
def getPP(p,pn,pp,ppn):
ps = pp[ppn] if p[pn] == '*' and len(pp) > ppn else p[pn]
return ps
p0 = getPP(p,0,pp,0)
is_json = str(p0).startswith('json:')
if is_json:
html = self.dealJson(html)
pdfh = jsp.pjfh if is_json else jsp.pdfh
pdfa = jsp.pjfa if is_json else jsp.pdfa
pd = jsp.pj if is_json else jsp.pd
# print(html)
try:
if self.double:
items = pdfa(html, p[0].replace('json:',''))
items = pdfa(html, p0.replace('json:',''))
# print(p[0])
# print(items)
# print(len(items))
for item in items:
items2 = pdfa(item,p[1])
# print(items2)
# print(len(items2))
for item2 in items2:
try:
title = pdfh(item2, p[2])
p2 = getPP(p,2,pp,1)
title = pdfh(item2, p2)
# print(title)
try:
img = pd(item2, p[3])
p3 = getPP(p,3,pp,2)
img = pd(item2, p3)
except:
img = ''
desc = pdfh(item2, p[4])
links = [pd(item2, p5) if not self.detailUrl else pdfh(item2, p5) for p5 in p[5].split('+')]
try:
p4 = getPP(p,4,pp,3)
desc = pdfh(item2, p4)
except:
desc = ''
p5 = getPP(p,5,pp,4)
links = [pd(item2, _p5) if not self.detailUrl else pdfh(item2, _p5) for _p5 in p5.split('+')]
link = '$'.join(links)
content = '' if len(p) < 7 else pdfh(item2, p[6])
if len(p) > 6 and p[6]:
p6 = getPP(p,6,pp,5)
content = pdfh(item2, p6)
else:
content = ''
videos.append({
"vod_id": link,
"vod_name": title,
......@@ -603,17 +623,31 @@ class CMS:
except:
pass
else:
items = pdfa(html, p[0].replace('json:',''))
items = pdfa(html, p0.replace('json:',''))
# print(items)
for item in items:
try:
title = pdfh(item, p[1])
img = pd(item, p[2])
desc = pdfh(item, p[3])
p1 = getPP(p,1,pp,1)
title = pdfh(item, p1)
try:
p2 = getPP(p,2,pp,2)
img = pd(item, p2)
except:
img = ''
try:
p3 = getPP(p,3,pp,3)
desc = pdfh(item, p3)
except:
desc = ''
p4 = getPP(p,4,pp,4)
# link = pd(item, p[4])
links = [pd(item, p5) if not self.detailUrl else pdfh(item, p5) for p5 in p[4].split('+')]
links = [pd(item, _p5) if not self.detailUrl else pdfh(item, _p5) for _p5 in p4.split('+')]
link = '$'.join(links)
content = '' if len(p) < 6 else pdfh(item, p[5])
if len(p) > 5 and p[5]:
p5 = getPP(p,5,pp,5)
content = pdfh(item, p5)
else:
content = ''
videos.append({
"vod_id": link,
"vod_name": title,
......@@ -1096,9 +1130,14 @@ class CMS:
return self.blank()
# p = self.一级.split(';') if self.搜索 == '*' and self.一级 else self.搜索.split(';') # 解析
p = self.一级 if self.搜索 == '*' and self.一级 else self.搜索
pp = self.一级.split(';')
jsp = jsoup(self.url)
videos = []
is_js = isinstance(p, str) and str(p).startswith('js:') # 是js
def getPP(p, pn, pp, ppn):
ps = pp[ppn] if p[pn] == '*' and len(pp) > ppn else p[pn]
return ps
if is_js:
headers['Referer'] = getHome(url)
py_ctx.update({
......@@ -1159,28 +1198,34 @@ class CMS:
logger.info('搜索结果源码未包含关键字,疑似搜索失败,正为您打印结果源码')
print(html)
items = pdfa(html,p[0].replace('json:','',1))
p0 = getPP(p,0,pp,0)
items = pdfa(html,p0.replace('json:','',1))
# print(len(items),items)
videos = []
for item in items:
# print(item)
try:
# title = pdfh(item, p[1])
title = ''.join([pdfh(item, i) for i in p[1].split('||')])
p1 = getPP(p, 1, pp, 1)
title = ''.join([pdfh(item, i) for i in p1.split('||')])
try:
img = pd(item, p[2])
p2 = getPP(p, 2, pp, 2)
img = pd(item, p2)
except:
img = ''
try:
desc = pdfh(item, p[3])
p3 = getPP(p, 3, pp, 3)
desc = pdfh(item, p3)
except:
desc = ''
try:
content = '' if len(p) < 6 else pdfh(item, p[5])
except:
if len(p) > 5 and p[5]:
p5 = getPP(p, 5, pp, 5)
content = pdfh(item, p5)
else:
content = ''
# link = '$'.join([pd(item, p4) for p4 in p[4].split('+')])
links = [pd(item, p4) if not self.detailUrl else pdfh(item, p4) for p4 in p[4].split('+')]
p4 = getPP(p, 4, pp, 4)
links = [pd(item, _p4) if not self.detailUrl else pdfh(item, _p4) for _p4 in p4.split('+')]
link = '$'.join(links)
# print(content)
# sid = self.regStr(sid, "/video/(\\S+).html")
......
var rule={
title:'if101',
host:'https://www.oulevod.tv',
// homeUrl:'/',
url:'/index.php/vod/show/id/fyclass/page/fypage.html',
searchUrl:'/index.php/vod/search.html?wd=**',
searchable:2,//是否启用全局搜索,
quickSearch:0,//是否启用快速搜索,
filterable:0,//是否启用分类筛选,
headers:{//网站的请求头,完整支持所有的,常带ua和cookies
'User-Agent':'MOBILE_UA',
// "Cookie": "searchneed=ok"
},
class_parse:'.conch-nav&&ul&&li;a&&Text;a&&href;./(\\d+).html',
cate_exclude:'',
play_parse:true,
lazy:'',
limit:6,
推荐:'body&&.hl-list-wrap;ul&&li;a&&title;.hl-lazy&&data-original;.hl-pic-text&&Text;a&&href',
double:true, // 推荐内容是否双层定位
一级:'.hl-vod-list&&li;a&&title;.hl-lazy&&data-original;.hl-pic-text&&Text;a&&href',
二级:{"title":".hl-dc-title&&Text;.hl-col-xs-12&&em&&.hl-text-muted:eq(0)&&Text","img":".hl-lazy&&data-original","desc":".hl-col-xs-12&&em&&.hl-text-muted:eq(-2)&&Text;.hl-col-xs-12&&em&&.hl-text-muted:eq(1)&&Text;.hl-col-xs-12&&em&&.hl-text-muted:eq(2)&&Text","content":".hl-content-text&&Text","tabs":".hl-plays-wrap","lists":".hl-plays-list:eq(#id) li"},
搜索:'.hl-list-wrap&&ul&&li;h3&&Text;.hl-lazy&&data-original;.hl-item-title:eq(0)&&Text;a&&href',
}
\ No newline at end of file
3.9.8beta2
\ No newline at end of file
3.9.9
\ No newline at end of file
var rule = {
title:'小品网',
host:'http://www.xiaopin.tv',
url:'/?cate=fyclass&page=fypage',
searchUrl:'/search.php?q=**',
searchable:2,
quickSearch:0,
filterable:0,
headers:{
'User-Agent':'MOBILE_UA'
},
timeout:5000,
class_name:'小品&相声&二人转&春晚小品',
class_url:'2&1&3&14',
play_parse:true,
lazy:'',
limit:6,
double:true,
推荐:'.itemlist;ul&&li;.itemname&&a&&Text;.itemimg img&&src;*;*',
一级:'.catecon&&ul&&li;.catename&&a&&Text;img&&src;.icoplay&&Text;a&&href',
二级:'*',
搜索:'*',
}
\ No newline at end of file
......@@ -11,14 +11,15 @@ var rule={
'User-Agent':'MOBILE_UA',
// "Cookie": "searchneed=ok"
},
class_name:'电影&连续剧&综艺&动漫',
class_url:'dianying&lianxuju&zongyi&dongman',
class_parse:'nav ul li;a&&Text;a&&href;.*/(.*?)\.html',
cate_exclude:'专题',
play_parse:true,
lazy:'',
limit:6,
推荐:'.show;ul&&li;a&&title;img&&src;.score&&Text;a&&href',
推荐:'.show;ul&&li;*;*;*;*',
double:true, // 推荐内容是否双层定位
一级:'.row&&li;a&&title;img&&src;.score&&Text;a&&href',
一级:'.show&&ul&&li;a&&title;img&&src;.score&&Text;a&&href',
二级:{"title":"h1&&Text;.info&&ul&&p&&Text","img":"img&&src","desc":".info&&ul&&p:eq(-2)&&Text;.info&&ul&&p:eq(-1)&&Text;.info&&ul&&p:eq(0)&&Text;.info&&ul&&p:eq(1)&&Text;.info&&ul&&p:eq(2)&&Text;.info&&ul&&p:eq(3)&&Text","content":".text&&Text","tabs":".play&&span","lists":".playlist&&ul:eq(#id) li"},
搜索:'.row&&li;a&&title;img&&src;.score&&Text;a&&href',
搜索:'*',
// 搜索:'*;*;*;*;*',
}
\ No newline at end of file
......@@ -1054,6 +1054,19 @@ function homeParse(homeObj) {
}
/**
* 推荐和搜索单字段继承一级
* @param p 推荐或搜索的解析分割;列表
* @param pn 自身列表序号
* @param pp 一级解析分割;列表
* @param ppn 继承一级序号
* @returns {*}
*/
function getPP(p, pn, pp, ppn){
let ps = p[pn] === '*' && pp.length > ppn ?pp[ppn]:p[pn]
return ps
}
/**
* 首页推荐列表解析
* @param homeVodObj
......@@ -1074,6 +1087,7 @@ function homeVodParse(homeVodObj){
return '{}'
}
p = p.trim();
let pp = rule.一级.split(';');
if(p.startsWith('js:')){
const TYPE = 'home';
var input = MY_URL;
......@@ -1087,12 +1101,13 @@ function homeVodParse(homeVodObj){
} else if (homeVodObj.double && p.length < 6) {
return '{}'
}
let _ps = parseTags.getParse(p[0]);
let p0 = getPP(p,0,pp,0)
let _ps = parseTags.getParse(p0);
_pdfa = _ps.pdfa;
_pdfh = _ps.pdfh;
_pd = _ps.pd;
let is_json = p[0].startsWith('json:');
p[0] = p[0].replace(/^(jsp:|json:|jq:)/,'');
let is_json = p0.startsWith('json:');
p0 = p0.replace(/^(jsp:|json:|jq:)/,'');
// print(p[0]);
let html = homeHtmlCache || getHtml(MY_URL);
homeHtmlCache = undefined;
......@@ -1103,7 +1118,7 @@ function homeVodParse(homeVodObj){
try {
console.log('double:' + homeVodObj.double);
if (homeVodObj.double) {
let items = _pdfa(html, p[0]);
let items = _pdfa(html, p0);
// console.log(items.length);
for (let item of items) {
// console.log(p[1]);
......@@ -1111,22 +1126,36 @@ function homeVodParse(homeVodObj){
// console.log(items2.length);
for (let item2 of items2) {
try {
let title = _pdfh(item2, p[2]);
let p2 = getPP(p,2,pp,1);
let title = _pdfh(item2, p2);
let img = '';
try {
img = _pd(item2, p[3])
} catch (e) {
}
let desc = _pdfh(item2, p[4]);
let p3 = getPP(p,3,pp,2);
img = _pd(item2, p3);
} catch (e) {}
let desc = '';
try {
let p4 = getPP(p,4,pp,3);
desc = _pdfh(item2, p4);
}catch (e) {}
let p5 = getPP(p,5,pp,4);
let links = [];
for (let p5 of p[5].split('+')) {
let link = !homeVodObj.detailUrl ? _pd(item2, p5, MY_URL) : _pdfh(item2, p5);
for (let _p5 of p5.split('+')) {
let link = !homeVodObj.detailUrl ? _pd(item2, _p5, MY_URL) : _pdfh(item2, _p5);
links.push(link);
}
let content;
if(p.length > 6 && p[6]){
let p6 = getPP(p,6,pp,5);
content = _pdfh(item2, p6);
} else{
content = '';
}
let vod = {
vod_name: title,
vod_pic: img,
vod_remarks: desc,
vod_content: content,
vod_id: links.join('$')
};
// print(vod);
......@@ -1142,26 +1171,39 @@ function homeVodParse(homeVodObj){
} else {
let items = _pdfa(html, p[0]);
let items = _pdfa(html, p0);
for (let item of items) {
try {
let title = _pdfh(item, p[1]);
let p1 = getPP(p,1,pp,1);
let title = _pdfh(item, p1);
let img = '';
try {
img = _pd(item, p[2], MY_URL);
} catch (e) {
}
let desc = _pdfh(item, p[3]);
let p2 = getPP(p,2,pp,2);
img = _pd(item, p2, MY_URL);
} catch (e) {}
let desc = '';
try {
let p3 = getPP(p,3,pp,3);
desc = _pdfh(item, p3);
}catch (e) {}
let p4 = getPP(p,4,pp,4);
let links = [];
for (let p5 of p[4].split('+')) {
let link = !homeVodObj.detailUrl ? _pd(item, p5, MY_URL) : _pdfh(item, p5);
for (let _p5 of p4.split('+')) {
let link = !homeVodObj.detailUrl ? _pd(item, _p5, MY_URL) : _pdfh(item, _p5);
links.push(link);
}
let content;
if(p.length > 5 && p[5]){
let p5 = getPP(p,5,pp,5);
content = _pdfh(item, p5);
}else{
content = ''
}
let vod = {
vod_name: title,
vod_pic: img,
vod_remarks: desc,
vod_content: content,
vod_id: links.join('$')
};
d.push(vod);
......@@ -1308,6 +1350,7 @@ function searchParse(searchObj) {
return '{}'
}
p = p.trim();
let pp = rule.一级.split(';');
let url = searchObj.searchUrl.replaceAll('**', searchObj.wd).replaceAll('fypage', searchObj.pg);
MY_URL = url;
console.log(MY_URL);
......@@ -1325,12 +1368,13 @@ function searchParse(searchObj) {
if (p.length < 5) {
return '{}'
}
let _ps = parseTags.getParse(p[0]);
let p0 = getPP(p,0,pp,0);
let _ps = parseTags.getParse(p0);
_pdfa = _ps.pdfa;
_pdfh = _ps.pdfh;
_pd = _ps.pd;
let is_json = p[0].startsWith('json:');
p[0] = p[0].replace(/^(jsp:|json:|jq:)/,'');
let is_json = p0.startsWith('json:');
p0 = p0.replace(/^(jsp:|json:|jq:)/,'');
try {
let html = getHtml(MY_URL);
if (html) {
......@@ -1352,22 +1396,30 @@ function searchParse(searchObj) {
if(is_json){
html = dealJson(html);
}
let list = _pdfa(html, p[0]);
let list = _pdfa(html, p0);
list.forEach(it => {
let links = p[4].split('+').map(p4=>{
return !rule.detailUrl?_pd(it, p4,MY_URL):_pdfh(it, p4)
let p1 = getPP(p, 1, pp, 1);
let p2 = getPP(p, 2, pp, 2);
let p3 = getPP(p, 3, pp, 3);
let p4 = getPP(p, 4, pp, 4);
let links = p4.split('+').map(_p4=>{
return !rule.detailUrl?_pd(it, _p4,MY_URL):_pdfh(it, _p4)
});
let link = links.join('$');
let content;
if(p.length > 5 && p[5]){
let p5 = getPP(p,5,pp,5);
content = _pdfh(item, p5);
}else{
content = '';
}
let ob = {
'vod_id': link,
'vod_name': _pdfh(it, p[1]).replace(/\n|\t/g,'').trim(),
'vod_pic': _pd(it, p[2],MY_URL),
'vod_remarks': _pdfh(it, p[3]).replace(/\n|\t/g,'').trim(),
'vod_name': _pdfh(it, p1).replace(/\n|\t/g,'').trim(),
'vod_pic': _pd(it, p2,MY_URL),
'vod_remarks': _pdfh(it, p3).replace(/\n|\t/g,'').trim(),
'vod_content': content.replace(/\n|\t/g,'').trim(),
};
if (p.length > 5 && p[5]) {
ob.vod_content = _pdfh(it, p[5]);
}
d.push(ob);
});
......@@ -1375,7 +1427,6 @@ function searchParse(searchObj) {
} catch (e) {
return '{}'
}
}
return JSON.stringify({
'page': parseInt(searchObj.pg),
......
此差异已折叠。
......@@ -49,6 +49,8 @@
[获取本地设备信息](https://m.jb51.net/article/140716.htm)
###### 2022/10/17
- [X] 3.9.8beta2 全局关闭https对应的ssl证书验证
- [X] 3.9.9 增加新特性,简写快看源,修复小品网推荐显示
- [X] 新增 推荐和搜索支持用*替代继承一级对应位置的列表,标题,图片,描述,详情,内容等定位
###### 2022/10/16
- [X] 3.9.8 完成ddys选集播放和免嗅(可惜我刚弄完没播放俩个剧就被封ip了)
- [X] 3.9.8beta1 增加了俩自建js解析
......
#coding=utf-8
#!/usr/bin/python
# coding=utf-8
# !/usr/bin/python
import sys
sys.path.append('..')
sys.path.append('..')
from base.spider import Spider
import json
import base64
from requests import session, utils
from Crypto.Cipher import AES
class Spider(Spider): # 元类 默认的元类 type
def getName(self):
return "厂长资源"
def init(self,extend=""):
print("============{0}============".format(extend))
pass
def homeContent(self,filter):
result = {}
cateManual = {
"豆瓣电影Top250": "dbtop250",
"最新电影": "zuixindianying",
"电视剧": "dsj",
"国产剧": "gcj",
"美剧": "meijutt",
"韩剧": "hanjutv",
"番剧": "fanju",
"动漫": "dm"
}
classes = []
for k in cateManual:
classes.append({
'type_name':k,
'type_id':cateManual[k]
})
result['class'] = classes
return result
def homeVideoContent(self):
rsp = self.fetch("https://czspp.com")
root = self.html(rsp.text)
aList = root.xpath("//div[@class='mi_btcon']//ul/li")
videos = []
for a in aList:
name = a.xpath('./a/img/@alt')[0]
pic = a.xpath('./a/img/@data-original')[0]
mark = a.xpath("./div[@class='hdinfo']/span/text()")[0]
sid = a.xpath("./a/@href")[0]
sid = self.regStr(sid,"/movie/(\\S+).html")
videos.append({
"vod_id":sid,
"vod_name":name,
"vod_pic":pic,
"vod_remarks":mark
})
result = {
'list':videos
}
return result
def categoryContent(self,tid,pg,filter,extend):
result = {}
url = 'https://czspp.com/{0}/page/{1}'.format(tid,pg)
rsp = self.fetch(url)
root = self.html(rsp.text)
aList = root.xpath("//div[contains(@class,'mi_cont')]//ul/li")
videos = []
for a in aList:
name = a.xpath('./a/img/@alt')[0]
pic = a.xpath('./a/img/@data-original')[0]
mark = a.xpath("./div[@class='hdinfo']/span/text()")[0]
sid = a.xpath("./a/@href")[0]
sid = self.regStr(sid,"/movie/(\\S+).html")
videos.append({
"vod_id":sid,
"vod_name":name,
"vod_pic":pic,
"vod_remarks":mark
})
def getName(self):
return "厂长资源"
result['list'] = videos
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
return result
def detailContent(self,array):
tid = array[0]
url = 'https://czspp.com/movie/{0}.html'.format(tid)
rsp = self.fetch(url)
root = self.html(rsp.text)
node = root.xpath("//div[@class='dyxingq']")[0]
def init(self, extend=""):
print("============{0}============".format(extend))
pass
pic = node.xpath(".//div[@class='dyimg fl']/img/@src")[0]
title = node.xpath('.//h1/text()')[0]
detail = root.xpath(".//div[@class='yp_context']//p/text()")[0]
def homeContent(self, filter):
result = {}
cateManual = {
"豆瓣电影Top250": "dbtop250",
"最新电影": "zuixindianying",
"电视剧": "dsj",
"国产剧": "gcj",
"美剧": "meijutt",
"韩剧": "hanjutv",
"番剧": "fanju",
"动漫": "dm"
}
classes = []
for k in cateManual:
classes.append({
'type_name': k,
'type_id': cateManual[k]
})
result['class'] = classes
return result
vod = {
"vod_id":tid,
"vod_name":title,
"vod_pic":pic,
"type_name":"",
"vod_year":"",
"vod_area":"",
"vod_remarks":"",
"vod_actor":"",
"vod_director":"",
"vod_content":detail
}
def homeVideoContent(self):
url = "https://czspp.com"
if len(self.cookies) <= 0:
self.getCookie(url)
url = url + self.zid
rsp = self.fetch(url)
root = self.html(self.cleanText(rsp.text))
aList = root.xpath("//div[@class='mi_btcon']//ul/li")
videos = []
for a in aList:
name = a.xpath('./a/img/@alt')[0]
pic = a.xpath('./a/img/@data-original')[0]
mark = a.xpath("./div[@class='hdinfo']/span/text()")[0]
sid = a.xpath("./a/@href")[0]
sid = self.regStr(sid, "/movie/(\\S+).html")
videos.append({
"vod_id": sid,
"vod_name": name,
"vod_pic": pic,
"vod_remarks": mark
})
result = {
'list': videos
}
return result
infoArray = node.xpath(".//ul[@class='moviedteail_list']/li")
for info in infoArray:
content = info.xpath('string(.)')
if content.startswith('类型'):
vod['type_name'] = content
if content.startswith('年份'):
vod['vod_year'] = content
if content.startswith('地区'):
vod['vod_area'] = content
if content.startswith('豆瓣'):
vod['vod_remarks'] = content
if content.startswith('主演'):
vod['vod_actor'] = content
if content.startswith('导演'):
vod['vod_director'] = content
# if content.startswith('剧情'):
# vod['vod_content'] = content
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"}
cookies = ''
def getCookie(self,url):
rsp = self.fetch(url,headers=self.header)
baseurl = self.regStr(reg=r'(https://.*?/)', src=url)
append = url.replace(baseurl,'')
zid = self.regStr(rsp.text, "{0}(\\S+)\"".format(append))
self.zid = zid
self.cookies = rsp.cookies
if 'btwaf' not in zid:
zid = ''
return rsp.cookies, zid
vod_play_from = '$$$'
playFrom = ['厂长']
vod_play_from = vod_play_from.join(playFrom)
vod_play_url = '$$$'
playList = []
vodList = root.xpath("//div[@class='paly_list_btn']")
for vl in vodList:
vodItems = []
aList = vl.xpath('./a')
for tA in aList:
href = tA.xpath('./@href')[0]
name = tA.xpath('./text()')[0]
tId = self.regStr(href,'/v_play/(\\S+).html')
vodItems.append(name + "$" + tId)
joinStr = '#'
joinStr = joinStr.join(vodItems)
playList.append(joinStr)
vod_play_url = vod_play_url.join(playList)
def categoryContent(self, tid, pg, filter, extend):
result = {}
url = 'https://czspp.com/{0}/page/{1}'.format(tid,pg)
if len(self.cookies) <= 0:
self.getCookie(url)
url = url + self.zid
rsp = self.fetch(url, cookies=self.cookies,headers=self.header)
root = self.html(self.cleanText(rsp.text))
aList = root.xpath("//div[contains(@class,'bt_img mi_ne_kd mrb')]/ul/li")
videos = []
for a in aList:
name = a.xpath('./a/img/@alt')[0]
pic = a.xpath('./a/img/@data-original')[0]
mark = a.xpath("./div[@class='hdinfo']/span/text()")[0]
sid = a.xpath("./a/@href")[0]
sid = self.regStr(sid, "/movie/(\\S+).html")
videos.append({
"vod_id": sid,
"vod_name": name,
"vod_pic": pic,
"vod_remarks": mark
})
result['list'] = videos
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
return result
vod['vod_play_from'] = vod_play_from
vod['vod_play_url'] = vod_play_url
def detailContent(self, array):
tid = array[0]
url = 'https://czspp.com/movie/{0}.html'.format(tid)
if len(self.cookies) <= 0:
self.getCookie(url)
url = url + self.zid
rsp = self.fetch(url,cookies=self.cookies,headers=self.header)
root = self.html(self.cleanText(rsp.text))
node = root.xpath("//div[@class='dyxingq']")[0]
pic = node.xpath(".//div[@class='dyimg fl']/img/@src")[0]
title = node.xpath('.//h1/text()')[0]
detail = root.xpath(".//div[@class='yp_context']//p/text()")[0]
vod = {
"vod_id": tid,
"vod_name": title,
"vod_pic": pic,
"type_name": "",
"vod_year": "",
"vod_area": "",
"vod_remarks": "",
"vod_actor": "",
"vod_director": "",
"vod_content": detail
}
infoArray = node.xpath(".//ul[@class='moviedteail_list']/li")
for info in infoArray:
content = info.xpath('string(.)')
if content.startswith('地区'):
tpyeare = ''
for inf in info:
tn = inf.text
tpyeare = tpyeare +'/'+'{0}'.format(tn)
vod['vod_area'] = tpyeare.strip('/')
if content.startswith('年份'):
vod['vod_year'] = content.replace("年份:","")
if content.startswith('主演'):
tpyeact = ''
for inf in info:
tn = inf.text
tpyeact = tpyeact +'/'+'{0}'.format(tn)
vod['vod_actor'] = tpyeact.strip('/')
if content.startswith('导演'):
tpyedire = ''
for inf in info:
tn = inf.text
tpyedire = tpyedire +'/'+'{0}'.format(tn)
vod['vod_director'] = tpyedire .strip('/')
vod_play_from = '$$$'
playFrom = ['厂长']
vod_play_from = vod_play_from.join(playFrom)
vod_play_url = '$$$'
playList = []
vodList = root.xpath("//div[@class='paly_list_btn']")
for vl in vodList:
vodItems = []
aList = vl.xpath('./a')
for tA in aList:
href = tA.xpath('./@href')[0]
name = tA.xpath('./text()')[0].replace('\xa0','')
tId = self.regStr(href, '/v_play/(\\S+).html')
vodItems.append(name + "$" + tId)
joinStr = '#'
joinStr = joinStr.join(vodItems)
playList.append(joinStr)
vod_play_url = vod_play_url.join(playList)
result = {
'list':[
vod
]
}
return result
vod['vod_play_from'] = vod_play_from
vod['vod_play_url'] = vod_play_url
result = {
'list': [
vod
]
}
return result
def searchContent(self,key,quick):
url = 'https://czspp.com/xssearch?q={0}'.format(key)
# getHeader()
rsp = self.fetch(url)
root = self.html(rsp.text)
def searchContent(self, key, quick):
url = 'https://czspp.com/xssearch?q={0}'.format(key)
if len(self.cookies) <= 0:
self.getCookie(url)
url = url + self.zid
rsp = self.fetch(url,cookies=self.cookies,headers=self.header)
root = self.html(self.cleanText(rsp.text))
vodList = root.xpath("//div[contains(@class,'mi_ne_kd')]/ul/li/a")
videos = []
for vod in vodList:
name = vod.xpath('./img/@alt')[0]
pic = vod.xpath('./img/@data-original')[0]
href = vod.xpath('./@href')[0]
tid = self.regStr(href, 'movie/(\\S+).html')
res = vod.xpath('./div[@class="jidi"]/span/text()')
if len(res) == 0:
remark = '全1集'
else:
remark = vod.xpath('./div[@class="jidi"]/span/text()')[0]
videos.append({
"vod_id": tid,
"vod_name": name,
"vod_pic": pic,
"vod_remarks": remark
})
result = {
'list': videos
}
return result
config = {
"player": {},
"filter": {}
}
header = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36"
}
def parseCBC(self, enc, key, iv):
keyBytes = key.encode("utf-8")
ivBytes = iv.encode("utf-8")
cipher = AES.new(keyBytes, AES.MODE_CBC, ivBytes)
msg = cipher.decrypt(enc)
paddingLen = msg[len(msg) - 1]
return msg[0:-paddingLen]
result = {}
vodList = root.xpath("//div[contains(@class,'mi_ne_kd')]/ul/li/a")
videos = []
for vod in vodList:
name = vod.xpath('./img/@alt')[0]
pic = vod.xpath('./img/@data-original')[0]
href = vod.xpath('./@href')[0]
tid = self.regStr(href,'movie/(\\S+).html')
remark = ""
videos.append({
"vod_id": tid,
"vod_name": name,
"vod_pic": pic,
"vod_remarks": remark
})
result = {
'list':videos
}
return result
def playerContent(self, flag, id, vipFlags):
result = {}
url = 'https://czspp.com/v_play/{0}.html'.format(id)
if len(self.cookies) <= 0:
self.getCookie(url)
url = url + self.zid
pat = '\\"([^\\"]+)\\";var [\\d\\w]+=function dncry.*md5.enc.Utf8.parse\\(\\"([\\d\\w]+)\\".*md5.enc.Utf8.parse\\(([\\d]+)\\)'
rsp = self.fetch(url,cookies=self.cookies,headers=self.header)
html = rsp.text
content = self.regStr(html, pat)
if content == '':
str3 = url
pars = 1
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
else:
key = self.regStr(html, pat, 2)
iv = self.regStr(html, pat, 3)
decontent = self.parseCBC(base64.b64decode(content), key, iv).decode()
urlPat = 'video: \\{url: \\\"([^\\\"]+)\\\"'
vttPat = 'subtitle: \\{url:\\\"([^\\\"]+\\.vtt)\\\"'
str3 = self.regStr(decontent, urlPat)
str4 = self.regStr(decontent, vttPat)
self.loadVtt(str3)
pars = 0
header = ''
if len(str4) > 0:
result['subf'] = '/vtt/utf-8'
result['subt'] = ''
result = {
'parse': pars,
'playUrl': '',
'url': str3,
'header': header
}
return result
config = {
"player": { },
"filter": { }
}
header = {
"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36"
}
def parseCBC(self, enc, key, iv):
keyBytes = key.encode("utf-8")
ivBytes = iv.encode("utf-8")
cipher = AES.new(keyBytes, AES.MODE_CBC, ivBytes)
msg = cipher.decrypt(enc)
paddingLen = msg[len(msg)-1]
return msg[0:-paddingLen]
def loadVtt(self, url):
pass
def playerContent(self,flag,id,vipFlags):
url = 'https://czspp.com/v_play/{0}.html'.format(id)
pat = '\\"([^\\"]+)\\";var [\\d\\w]+=function dncry.*md5.enc.Utf8.parse\\(\\"([\\d\\w]+)\\".*md5.enc.Utf8.parse\\(([\\d]+)\\)'
rsp = self.fetch(url)
def isVideoFormat(self, url):
pass
html = rsp.text
content = self.regStr(html,pat)
key = self.regStr(html,pat,2)
iv = self.regStr(html,pat,3)
decontent = self.parseCBC(base64.b64decode(content),key,iv).decode()
def manualVideoCheck(self):
pass
urlPat = 'video: \\{url: \\\"([^\\\"]+)\\\"'
vttPat = 'subtitle: \\{url:\\\"([^\\\"]+\\.vtt)\\\"'
str3 = self.regStr(decontent,urlPat)
str4 = self.regStr(decontent,vttPat)
self.loadVtt(str3)
result = {
'parse':'0',
'playUrl':'',
'url':str3,
'header':''
}
if len(str4) > 0:
result['subf'] = '/vtt/utf-8'
# result['subt'] = Proxy.localProxyUrl() + "?do=czspp&url=" + URLEncoder.encode(str4)
result['subt'] = ''
return result
def loadVtt(self,url):
print(url)
def isVideoFormat(self,url):
pass
def manualVideoCheck(self):
pass
def localProxy(self,param):
action = {}
return [200, "video/MP2T", action, ""]
\ No newline at end of file
def localProxy(self, param):
action = {}
return [200, "video/MP2T", action, ""]
#coding=utf-8
#!/usr/bin/python
import sys
sys.path.append('..')
from base.spider import Spider
import re
import math
class Spider(Spider):
def getName(self):
return "体育直播"
def init(self,extend=""):
pass
def isVideoFormat(self,url):
pass
def manualVideoCheck(self):
pass
def homeContent(self,filter):
result = {}
cateManual = {
"全部": ""
}
classes = []
for k in cateManual:
classes.append({
'type_name': k,
'type_id': cateManual[k]
})
result['class'] = classes
if (filter):
result['filters'] = self.config['filter']
return result
def homeVideoContent(self):
result = {}
return result
def categoryContent(self,tid,pg,filter,extend):
result = {}
url = 'https://m.jrskbs.com'
rsp = self.fetch(url)
html = self.html(rsp.text)
aList = html.xpath("//div[contains(@class, 'contentList')]/a")
videos = []
numvL = len(aList)
pgc = math.ceil(numvL/15)
for a in aList:
aid = a.xpath("./@href")[0]
aid = self.regStr(reg=r'/live/(.*?).html', src=aid)
img = a.xpath(".//div[@class='contentLeft']/p/img/@src")[0]
home = a.xpath(".//div[@class='contentLeft']/p[@class='false false']/text()")[0]
away = a.xpath(".//div[@class='contentRight']/p[@class='false false']/text()")[0]
rmList = a.xpath(".//div[@class='contentCenter']/p/text()")
remark = rmList[1].replace('|','').replace(' ','') + '|' + rmList[0]
videos.append({
"vod_id": aid,
"vod_name": home + 'vs' + away,
"vod_pic": img,
"vod_remarks": remark
})
result['list'] = videos
result['page'] = pg
result['pagecount'] = pgc
result['limit'] = numvL
result['total'] = numvL
return result
def detailContent(self,array):
aid = array[0]
url = "http://m.jrskbs.com/live/{0}.html".format(aid)
rsp = self.fetch(url)
root = self.html(rsp.text)
divContent = root.xpath("//div[@class='today']")[0]
home = divContent.xpath(".//p[@class='onePlayer homeTeam']/text()")[0]
away = divContent.xpath(".//div[3]/text()")[0].strip()
title = home + 'vs' + away
pic = divContent.xpath(".//img[@class='gameLogo1 homeTeam_img']/@src")[0]
typeName = divContent.xpath(".//div/p[@class='name1 matchTime_wap']/text()")[0]
remark = divContent.xpath(".//div/p[@class='time1 matchTitle']/text()")[0].replace(' ','')
vod = {
"vod_id": aid,
"vod_name": title,
"vod_pic": pic,
"type_name": typeName,
"vod_year": "",
"vod_area": "",
"vod_remarks": remark,
"vod_actor": '',
"vod_director":'',
"vod_content": ''
}
urlList = root.xpath("//div[@class='liveshow']/a")
playUrl = ''
for url in urlList:
name = url.xpath("./text()")[0]
purl = url.xpath("./@data-url")[0]
playUrl =playUrl + '{0}${1}#'.format(name, purl)
vod['vod_play_from'] = '体育直播'
vod['vod_play_url'] = playUrl
result = {
'list': [
vod
]
}
return result
def searchContent(self,key,quick):
result = {}
return result
def playerContent(self,flag,id,vipFlags):
result = {}
url = id
if '04stream' in url:
rsp = self.fetch(url)
html = rsp.text
strList = re.findall(r"eval\((.*?)\);", html)
fuctList = strList[1].split('+')
scrpit = ''
for fuc in fuctList:
if fuc.endswith(')'):
append = fuc.split(')')[-1]
else:
append = ''
Unicode = int(self.regStr(reg=r'l\((.*?)\)', src=fuc))
char = chr(Unicode % 256)
char = char + append
scrpit = scrpit + char
par = self.regStr(reg=r'/(.*)/', src=scrpit).replace(')', '')
pars = par.split('/')
infoList = strList[2].split('+')
str = ''
for info in infoList:
if info.startswith('O'):
Unicode = int(int(self.regStr(reg=r'O\((.*?)\)', src=info)) / int(pars[0]) / int(pars[1]))
char = chr(Unicode % 256)
str = str + char
purl = self.regStr(reg=r"play_url=\'(.*?)\'", src=str)
result["parse"] = 0
elif 'v.stnye.cc' in url:
purl = id
result["parse"] = 1
elif 'dplayer' in url:
url = 'https://m.jrskbs.com' + url
rsp = self.fetch(url)
purl = self.regStr(reg=r'var PlayUrl = \"(.*?)\"', src=rsp.text)
result["parse"] = 0
result["playUrl"] = ''
result["url"] = purl
result["header"] = ''
return result
config = {
"player": {},
"filter": {}
}
header = {}
def localProxy(self,param):
action = {
'url':'',
'header':'',
'param':'',
'type':'string',
'after':''
}
return [200, "video/MP2T", action, ""]
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册