提交 f160ead3 编写于 作者: H hjdhnx

升级至3.9.9,增加新特性

上级 cbbe21ac
无法预览此类型文件
...@@ -562,33 +562,53 @@ class CMS: ...@@ -562,33 +562,53 @@ class CMS:
if self.double and len(p) < 6: if self.double and len(p) < 6:
return self.blank() return self.blank()
jsp = jsoup(self.homeUrl) jsp = jsoup(self.homeUrl)
is_json = str(p[0]).startswith('json:') pp = self.一级.split(';')
def getPP(p,pn,pp,ppn):
ps = pp[ppn] if p[pn] == '*' and len(pp) > ppn else p[pn]
return ps
p0 = getPP(p,0,pp,0)
is_json = str(p0).startswith('json:')
if is_json: if is_json:
html = self.dealJson(html) html = self.dealJson(html)
pdfh = jsp.pjfh if is_json else jsp.pdfh pdfh = jsp.pjfh if is_json else jsp.pdfh
pdfa = jsp.pjfa if is_json else jsp.pdfa pdfa = jsp.pjfa if is_json else jsp.pdfa
pd = jsp.pj if is_json else jsp.pd pd = jsp.pj if is_json else jsp.pd
# print(html) # print(html)
try: try:
if self.double: if self.double:
items = pdfa(html, p[0].replace('json:','')) items = pdfa(html, p0.replace('json:',''))
# print(p[0]) # print(p[0])
# print(items) # print(items)
# print(len(items)) # print(len(items))
for item in items: for item in items:
items2 = pdfa(item,p[1]) items2 = pdfa(item,p[1])
# print(items2) # print(len(items2))
for item2 in items2: for item2 in items2:
try: try:
title = pdfh(item2, p[2]) p2 = getPP(p,2,pp,1)
title = pdfh(item2, p2)
# print(title)
try: try:
img = pd(item2, p[3]) p3 = getPP(p,3,pp,2)
img = pd(item2, p3)
except: except:
img = '' img = ''
desc = pdfh(item2, p[4]) try:
links = [pd(item2, p5) if not self.detailUrl else pdfh(item2, p5) for p5 in p[5].split('+')] p4 = getPP(p,4,pp,3)
desc = pdfh(item2, p4)
except:
desc = ''
p5 = getPP(p,5,pp,4)
links = [pd(item2, _p5) if not self.detailUrl else pdfh(item2, _p5) for _p5 in p5.split('+')]
link = '$'.join(links) link = '$'.join(links)
content = '' if len(p) < 7 else pdfh(item2, p[6]) if len(p) > 6 and p[6]:
p6 = getPP(p,6,pp,5)
content = pdfh(item2, p6)
else:
content = ''
videos.append({ videos.append({
"vod_id": link, "vod_id": link,
"vod_name": title, "vod_name": title,
...@@ -603,17 +623,31 @@ class CMS: ...@@ -603,17 +623,31 @@ class CMS:
except: except:
pass pass
else: else:
items = pdfa(html, p[0].replace('json:','')) items = pdfa(html, p0.replace('json:',''))
# print(items) # print(items)
for item in items: for item in items:
try: try:
title = pdfh(item, p[1]) p1 = getPP(p,1,pp,1)
img = pd(item, p[2]) title = pdfh(item, p1)
desc = pdfh(item, p[3]) try:
p2 = getPP(p,2,pp,2)
img = pd(item, p2)
except:
img = ''
try:
p3 = getPP(p,3,pp,3)
desc = pdfh(item, p3)
except:
desc = ''
p4 = getPP(p,4,pp,4)
# link = pd(item, p[4]) # link = pd(item, p[4])
links = [pd(item, p5) if not self.detailUrl else pdfh(item, p5) for p5 in p[4].split('+')] links = [pd(item, _p5) if not self.detailUrl else pdfh(item, _p5) for _p5 in p4.split('+')]
link = '$'.join(links) link = '$'.join(links)
content = '' if len(p) < 6 else pdfh(item, p[5]) if len(p) > 5 and p[5]:
p5 = getPP(p,5,pp,5)
content = pdfh(item, p5)
else:
content = ''
videos.append({ videos.append({
"vod_id": link, "vod_id": link,
"vod_name": title, "vod_name": title,
...@@ -1096,9 +1130,14 @@ class CMS: ...@@ -1096,9 +1130,14 @@ class CMS:
return self.blank() return self.blank()
# p = self.一级.split(';') if self.搜索 == '*' and self.一级 else self.搜索.split(';') # 解析 # p = self.一级.split(';') if self.搜索 == '*' and self.一级 else self.搜索.split(';') # 解析
p = self.一级 if self.搜索 == '*' and self.一级 else self.搜索 p = self.一级 if self.搜索 == '*' and self.一级 else self.搜索
pp = self.一级.split(';')
jsp = jsoup(self.url) jsp = jsoup(self.url)
videos = [] videos = []
is_js = isinstance(p, str) and str(p).startswith('js:') # 是js is_js = isinstance(p, str) and str(p).startswith('js:') # 是js
def getPP(p, pn, pp, ppn):
ps = pp[ppn] if p[pn] == '*' and len(pp) > ppn else p[pn]
return ps
if is_js: if is_js:
headers['Referer'] = getHome(url) headers['Referer'] = getHome(url)
py_ctx.update({ py_ctx.update({
...@@ -1159,28 +1198,34 @@ class CMS: ...@@ -1159,28 +1198,34 @@ class CMS:
logger.info('搜索结果源码未包含关键字,疑似搜索失败,正为您打印结果源码') logger.info('搜索结果源码未包含关键字,疑似搜索失败,正为您打印结果源码')
print(html) print(html)
items = pdfa(html,p[0].replace('json:','',1)) p0 = getPP(p,0,pp,0)
items = pdfa(html,p0.replace('json:','',1))
# print(len(items),items) # print(len(items),items)
videos = [] videos = []
for item in items: for item in items:
# print(item) # print(item)
try: try:
# title = pdfh(item, p[1]) # title = pdfh(item, p[1])
title = ''.join([pdfh(item, i) for i in p[1].split('||')]) p1 = getPP(p, 1, pp, 1)
title = ''.join([pdfh(item, i) for i in p1.split('||')])
try: try:
img = pd(item, p[2]) p2 = getPP(p, 2, pp, 2)
img = pd(item, p2)
except: except:
img = '' img = ''
try: try:
desc = pdfh(item, p[3]) p3 = getPP(p, 3, pp, 3)
desc = pdfh(item, p3)
except: except:
desc = '' desc = ''
try: if len(p) > 5 and p[5]:
content = '' if len(p) < 6 else pdfh(item, p[5]) p5 = getPP(p, 5, pp, 5)
except: content = pdfh(item, p5)
else:
content = '' content = ''
# link = '$'.join([pd(item, p4) for p4 in p[4].split('+')]) # link = '$'.join([pd(item, p4) for p4 in p[4].split('+')])
links = [pd(item, p4) if not self.detailUrl else pdfh(item, p4) for p4 in p[4].split('+')] p4 = getPP(p, 4, pp, 4)
links = [pd(item, _p4) if not self.detailUrl else pdfh(item, _p4) for _p4 in p4.split('+')]
link = '$'.join(links) link = '$'.join(links)
# print(content) # print(content)
# sid = self.regStr(sid, "/video/(\\S+).html") # sid = self.regStr(sid, "/video/(\\S+).html")
......
var rule={
title:'if101',
host:'https://www.oulevod.tv',
// homeUrl:'/',
url:'/index.php/vod/show/id/fyclass/page/fypage.html',
searchUrl:'/index.php/vod/search.html?wd=**',
searchable:2,//是否启用全局搜索,
quickSearch:0,//是否启用快速搜索,
filterable:0,//是否启用分类筛选,
headers:{//网站的请求头,完整支持所有的,常带ua和cookies
'User-Agent':'MOBILE_UA',
// "Cookie": "searchneed=ok"
},
class_parse:'.conch-nav&&ul&&li;a&&Text;a&&href;./(\\d+).html',
cate_exclude:'',
play_parse:true,
lazy:'',
limit:6,
推荐:'body&&.hl-list-wrap;ul&&li;a&&title;.hl-lazy&&data-original;.hl-pic-text&&Text;a&&href',
double:true, // 推荐内容是否双层定位
一级:'.hl-vod-list&&li;a&&title;.hl-lazy&&data-original;.hl-pic-text&&Text;a&&href',
二级:{"title":".hl-dc-title&&Text;.hl-col-xs-12&&em&&.hl-text-muted:eq(0)&&Text","img":".hl-lazy&&data-original","desc":".hl-col-xs-12&&em&&.hl-text-muted:eq(-2)&&Text;.hl-col-xs-12&&em&&.hl-text-muted:eq(1)&&Text;.hl-col-xs-12&&em&&.hl-text-muted:eq(2)&&Text","content":".hl-content-text&&Text","tabs":".hl-plays-wrap","lists":".hl-plays-list:eq(#id) li"},
搜索:'.hl-list-wrap&&ul&&li;h3&&Text;.hl-lazy&&data-original;.hl-item-title:eq(0)&&Text;a&&href',
}
\ No newline at end of file
3.9.8beta2 3.9.9
\ No newline at end of file \ No newline at end of file
var rule = {
title:'小品网',
host:'http://www.xiaopin.tv',
url:'/?cate=fyclass&page=fypage',
searchUrl:'/search.php?q=**',
searchable:2,
quickSearch:0,
filterable:0,
headers:{
'User-Agent':'MOBILE_UA'
},
timeout:5000,
class_name:'小品&相声&二人转&春晚小品',
class_url:'2&1&3&14',
play_parse:true,
lazy:'',
limit:6,
double:true,
推荐:'.itemlist;ul&&li;.itemname&&a&&Text;.itemimg img&&src;*;*',
一级:'.catecon&&ul&&li;.catename&&a&&Text;img&&src;.icoplay&&Text;a&&href',
二级:'*',
搜索:'*',
}
\ No newline at end of file
...@@ -11,14 +11,15 @@ var rule={ ...@@ -11,14 +11,15 @@ var rule={
'User-Agent':'MOBILE_UA', 'User-Agent':'MOBILE_UA',
// "Cookie": "searchneed=ok" // "Cookie": "searchneed=ok"
}, },
class_name:'电影&连续剧&综艺&动漫', class_parse:'nav ul li;a&&Text;a&&href;.*/(.*?)\.html',
class_url:'dianying&lianxuju&zongyi&dongman', cate_exclude:'专题',
play_parse:true, play_parse:true,
lazy:'', lazy:'',
limit:6, limit:6,
推荐:'.show;ul&&li;a&&title;img&&src;.score&&Text;a&&href', 推荐:'.show;ul&&li;*;*;*;*',
double:true, // 推荐内容是否双层定位 double:true, // 推荐内容是否双层定位
一级:'.row&&li;a&&title;img&&src;.score&&Text;a&&href', 一级:'.show&&ul&&li;a&&title;img&&src;.score&&Text;a&&href',
二级:{"title":"h1&&Text;.info&&ul&&p&&Text","img":"img&&src","desc":".info&&ul&&p:eq(-2)&&Text;.info&&ul&&p:eq(-1)&&Text;.info&&ul&&p:eq(0)&&Text;.info&&ul&&p:eq(1)&&Text;.info&&ul&&p:eq(2)&&Text;.info&&ul&&p:eq(3)&&Text","content":".text&&Text","tabs":".play&&span","lists":".playlist&&ul:eq(#id) li"}, 二级:{"title":"h1&&Text;.info&&ul&&p&&Text","img":"img&&src","desc":".info&&ul&&p:eq(-2)&&Text;.info&&ul&&p:eq(-1)&&Text;.info&&ul&&p:eq(0)&&Text;.info&&ul&&p:eq(1)&&Text;.info&&ul&&p:eq(2)&&Text;.info&&ul&&p:eq(3)&&Text","content":".text&&Text","tabs":".play&&span","lists":".playlist&&ul:eq(#id) li"},
搜索:'.row&&li;a&&title;img&&src;.score&&Text;a&&href', 搜索:'*',
// 搜索:'*;*;*;*;*',
} }
\ No newline at end of file
...@@ -1054,6 +1054,19 @@ function homeParse(homeObj) { ...@@ -1054,6 +1054,19 @@ function homeParse(homeObj) {
} }
/**
* 推荐和搜索单字段继承一级
* @param p 推荐或搜索的解析分割;列表
* @param pn 自身列表序号
* @param pp 一级解析分割;列表
* @param ppn 继承一级序号
* @returns {*}
*/
function getPP(p, pn, pp, ppn){
let ps = p[pn] === '*' && pp.length > ppn ?pp[ppn]:p[pn]
return ps
}
/** /**
* 首页推荐列表解析 * 首页推荐列表解析
* @param homeVodObj * @param homeVodObj
...@@ -1074,6 +1087,7 @@ function homeVodParse(homeVodObj){ ...@@ -1074,6 +1087,7 @@ function homeVodParse(homeVodObj){
return '{}' return '{}'
} }
p = p.trim(); p = p.trim();
let pp = rule.一级.split(';');
if(p.startsWith('js:')){ if(p.startsWith('js:')){
const TYPE = 'home'; const TYPE = 'home';
var input = MY_URL; var input = MY_URL;
...@@ -1087,12 +1101,13 @@ function homeVodParse(homeVodObj){ ...@@ -1087,12 +1101,13 @@ function homeVodParse(homeVodObj){
} else if (homeVodObj.double && p.length < 6) { } else if (homeVodObj.double && p.length < 6) {
return '{}' return '{}'
} }
let _ps = parseTags.getParse(p[0]); let p0 = getPP(p,0,pp,0)
let _ps = parseTags.getParse(p0);
_pdfa = _ps.pdfa; _pdfa = _ps.pdfa;
_pdfh = _ps.pdfh; _pdfh = _ps.pdfh;
_pd = _ps.pd; _pd = _ps.pd;
let is_json = p[0].startsWith('json:'); let is_json = p0.startsWith('json:');
p[0] = p[0].replace(/^(jsp:|json:|jq:)/,''); p0 = p0.replace(/^(jsp:|json:|jq:)/,'');
// print(p[0]); // print(p[0]);
let html = homeHtmlCache || getHtml(MY_URL); let html = homeHtmlCache || getHtml(MY_URL);
homeHtmlCache = undefined; homeHtmlCache = undefined;
...@@ -1103,7 +1118,7 @@ function homeVodParse(homeVodObj){ ...@@ -1103,7 +1118,7 @@ function homeVodParse(homeVodObj){
try { try {
console.log('double:' + homeVodObj.double); console.log('double:' + homeVodObj.double);
if (homeVodObj.double) { if (homeVodObj.double) {
let items = _pdfa(html, p[0]); let items = _pdfa(html, p0);
// console.log(items.length); // console.log(items.length);
for (let item of items) { for (let item of items) {
// console.log(p[1]); // console.log(p[1]);
...@@ -1111,22 +1126,36 @@ function homeVodParse(homeVodObj){ ...@@ -1111,22 +1126,36 @@ function homeVodParse(homeVodObj){
// console.log(items2.length); // console.log(items2.length);
for (let item2 of items2) { for (let item2 of items2) {
try { try {
let title = _pdfh(item2, p[2]); let p2 = getPP(p,2,pp,1);
let title = _pdfh(item2, p2);
let img = ''; let img = '';
try { try {
img = _pd(item2, p[3]) let p3 = getPP(p,3,pp,2);
} catch (e) { img = _pd(item2, p3);
} } catch (e) {}
let desc = _pdfh(item2, p[4]); let desc = '';
try {
let p4 = getPP(p,4,pp,3);
desc = _pdfh(item2, p4);
}catch (e) {}
let p5 = getPP(p,5,pp,4);
let links = []; let links = [];
for (let p5 of p[5].split('+')) { for (let _p5 of p5.split('+')) {
let link = !homeVodObj.detailUrl ? _pd(item2, p5, MY_URL) : _pdfh(item2, p5); let link = !homeVodObj.detailUrl ? _pd(item2, _p5, MY_URL) : _pdfh(item2, _p5);
links.push(link); links.push(link);
} }
let content;
if(p.length > 6 && p[6]){
let p6 = getPP(p,6,pp,5);
content = _pdfh(item2, p6);
} else{
content = '';
}
let vod = { let vod = {
vod_name: title, vod_name: title,
vod_pic: img, vod_pic: img,
vod_remarks: desc, vod_remarks: desc,
vod_content: content,
vod_id: links.join('$') vod_id: links.join('$')
}; };
// print(vod); // print(vod);
...@@ -1142,26 +1171,39 @@ function homeVodParse(homeVodObj){ ...@@ -1142,26 +1171,39 @@ function homeVodParse(homeVodObj){
} else { } else {
let items = _pdfa(html, p[0]); let items = _pdfa(html, p0);
for (let item of items) { for (let item of items) {
try { try {
let title = _pdfh(item, p[1]); let p1 = getPP(p,1,pp,1);
let title = _pdfh(item, p1);
let img = ''; let img = '';
try { try {
img = _pd(item, p[2], MY_URL); let p2 = getPP(p,2,pp,2);
} catch (e) { img = _pd(item, p2, MY_URL);
} catch (e) {}
} let desc = '';
let desc = _pdfh(item, p[3]); try {
let p3 = getPP(p,3,pp,3);
desc = _pdfh(item, p3);
}catch (e) {}
let p4 = getPP(p,4,pp,4);
let links = []; let links = [];
for (let p5 of p[4].split('+')) { for (let _p5 of p4.split('+')) {
let link = !homeVodObj.detailUrl ? _pd(item, p5, MY_URL) : _pdfh(item, p5); let link = !homeVodObj.detailUrl ? _pd(item, _p5, MY_URL) : _pdfh(item, _p5);
links.push(link); links.push(link);
} }
let content;
if(p.length > 5 && p[5]){
let p5 = getPP(p,5,pp,5);
content = _pdfh(item, p5);
}else{
content = ''
}
let vod = { let vod = {
vod_name: title, vod_name: title,
vod_pic: img, vod_pic: img,
vod_remarks: desc, vod_remarks: desc,
vod_content: content,
vod_id: links.join('$') vod_id: links.join('$')
}; };
d.push(vod); d.push(vod);
...@@ -1308,6 +1350,7 @@ function searchParse(searchObj) { ...@@ -1308,6 +1350,7 @@ function searchParse(searchObj) {
return '{}' return '{}'
} }
p = p.trim(); p = p.trim();
let pp = rule.一级.split(';');
let url = searchObj.searchUrl.replaceAll('**', searchObj.wd).replaceAll('fypage', searchObj.pg); let url = searchObj.searchUrl.replaceAll('**', searchObj.wd).replaceAll('fypage', searchObj.pg);
MY_URL = url; MY_URL = url;
console.log(MY_URL); console.log(MY_URL);
...@@ -1325,12 +1368,13 @@ function searchParse(searchObj) { ...@@ -1325,12 +1368,13 @@ function searchParse(searchObj) {
if (p.length < 5) { if (p.length < 5) {
return '{}' return '{}'
} }
let _ps = parseTags.getParse(p[0]); let p0 = getPP(p,0,pp,0);
let _ps = parseTags.getParse(p0);
_pdfa = _ps.pdfa; _pdfa = _ps.pdfa;
_pdfh = _ps.pdfh; _pdfh = _ps.pdfh;
_pd = _ps.pd; _pd = _ps.pd;
let is_json = p[0].startsWith('json:'); let is_json = p0.startsWith('json:');
p[0] = p[0].replace(/^(jsp:|json:|jq:)/,''); p0 = p0.replace(/^(jsp:|json:|jq:)/,'');
try { try {
let html = getHtml(MY_URL); let html = getHtml(MY_URL);
if (html) { if (html) {
...@@ -1352,22 +1396,30 @@ function searchParse(searchObj) { ...@@ -1352,22 +1396,30 @@ function searchParse(searchObj) {
if(is_json){ if(is_json){
html = dealJson(html); html = dealJson(html);
} }
let list = _pdfa(html, p[0]); let list = _pdfa(html, p0);
list.forEach(it => { list.forEach(it => {
let links = p[4].split('+').map(p4=>{ let p1 = getPP(p, 1, pp, 1);
return !rule.detailUrl?_pd(it, p4,MY_URL):_pdfh(it, p4) let p2 = getPP(p, 2, pp, 2);
let p3 = getPP(p, 3, pp, 3);
let p4 = getPP(p, 4, pp, 4);
let links = p4.split('+').map(_p4=>{
return !rule.detailUrl?_pd(it, _p4,MY_URL):_pdfh(it, _p4)
}); });
let link = links.join('$'); let link = links.join('$');
let content;
if(p.length > 5 && p[5]){
let p5 = getPP(p,5,pp,5);
content = _pdfh(item, p5);
}else{
content = '';
}
let ob = { let ob = {
'vod_id': link, 'vod_id': link,
'vod_name': _pdfh(it, p[1]).replace(/\n|\t/g,'').trim(), 'vod_name': _pdfh(it, p1).replace(/\n|\t/g,'').trim(),
'vod_pic': _pd(it, p[2],MY_URL), 'vod_pic': _pd(it, p2,MY_URL),
'vod_remarks': _pdfh(it, p[3]).replace(/\n|\t/g,'').trim(), 'vod_remarks': _pdfh(it, p3).replace(/\n|\t/g,'').trim(),
'vod_content': content.replace(/\n|\t/g,'').trim(),
}; };
if (p.length > 5 && p[5]) {
ob.vod_content = _pdfh(it, p[5]);
}
d.push(ob); d.push(ob);
}); });
...@@ -1375,7 +1427,6 @@ function searchParse(searchObj) { ...@@ -1375,7 +1427,6 @@ function searchParse(searchObj) {
} catch (e) { } catch (e) {
return '{}' return '{}'
} }
} }
return JSON.stringify({ return JSON.stringify({
'page': parseInt(searchObj.pg), 'page': parseInt(searchObj.pg),
......
此差异已折叠。
...@@ -49,6 +49,8 @@ ...@@ -49,6 +49,8 @@
[获取本地设备信息](https://m.jb51.net/article/140716.htm) [获取本地设备信息](https://m.jb51.net/article/140716.htm)
###### 2022/10/17 ###### 2022/10/17
- [X] 3.9.8beta2 全局关闭https对应的ssl证书验证 - [X] 3.9.8beta2 全局关闭https对应的ssl证书验证
- [X] 3.9.9 增加新特性,简写快看源,修复小品网推荐显示
- [X] 新增 推荐和搜索支持用*替代继承一级对应位置的列表,标题,图片,描述,详情,内容等定位
###### 2022/10/16 ###### 2022/10/16
- [X] 3.9.8 完成ddys选集播放和免嗅(可惜我刚弄完没播放俩个剧就被封ip了) - [X] 3.9.8 完成ddys选集播放和免嗅(可惜我刚弄完没播放俩个剧就被封ip了)
- [X] 3.9.8beta1 增加了俩自建js解析 - [X] 3.9.8beta1 增加了俩自建js解析
......
#coding=utf-8 # coding=utf-8
#!/usr/bin/python # !/usr/bin/python
import sys import sys
sys.path.append('..') sys.path.append('..')
from base.spider import Spider from base.spider import Spider
import json
import base64 import base64
from requests import session, utils
from Crypto.Cipher import AES from Crypto.Cipher import AES
class Spider(Spider): # 元类 默认的元类 type class Spider(Spider): # 元类 默认的元类 type
def getName(self): def getName(self):
return "厂长资源" return "厂长资源"
def init(self,extend=""):
print("============{0}============".format(extend))
pass
def homeContent(self,filter):
result = {}
cateManual = {
"豆瓣电影Top250": "dbtop250",
"最新电影": "zuixindianying",
"电视剧": "dsj",
"国产剧": "gcj",
"美剧": "meijutt",
"韩剧": "hanjutv",
"番剧": "fanju",
"动漫": "dm"
}
classes = []
for k in cateManual:
classes.append({
'type_name':k,
'type_id':cateManual[k]
})
result['class'] = classes
return result
def homeVideoContent(self):
rsp = self.fetch("https://czspp.com")
root = self.html(rsp.text)
aList = root.xpath("//div[@class='mi_btcon']//ul/li")
videos = []
for a in aList:
name = a.xpath('./a/img/@alt')[0]
pic = a.xpath('./a/img/@data-original')[0]
mark = a.xpath("./div[@class='hdinfo']/span/text()")[0]
sid = a.xpath("./a/@href")[0]
sid = self.regStr(sid,"/movie/(\\S+).html")
videos.append({
"vod_id":sid,
"vod_name":name,
"vod_pic":pic,
"vod_remarks":mark
})
result = {
'list':videos
}
return result
def categoryContent(self,tid,pg,filter,extend):
result = {}
url = 'https://czspp.com/{0}/page/{1}'.format(tid,pg)
rsp = self.fetch(url)
root = self.html(rsp.text)
aList = root.xpath("//div[contains(@class,'mi_cont')]//ul/li")
videos = []
for a in aList:
name = a.xpath('./a/img/@alt')[0]
pic = a.xpath('./a/img/@data-original')[0]
mark = a.xpath("./div[@class='hdinfo']/span/text()")[0]
sid = a.xpath("./a/@href")[0]
sid = self.regStr(sid,"/movie/(\\S+).html")
videos.append({
"vod_id":sid,
"vod_name":name,
"vod_pic":pic,
"vod_remarks":mark
})
result['list'] = videos def init(self, extend=""):
result['page'] = pg print("============{0}============".format(extend))
result['pagecount'] = 9999 pass
result['limit'] = 90
result['total'] = 999999
return result
def detailContent(self,array):
tid = array[0]
url = 'https://czspp.com/movie/{0}.html'.format(tid)
rsp = self.fetch(url)
root = self.html(rsp.text)
node = root.xpath("//div[@class='dyxingq']")[0]
pic = node.xpath(".//div[@class='dyimg fl']/img/@src")[0] def homeContent(self, filter):
title = node.xpath('.//h1/text()')[0] result = {}
detail = root.xpath(".//div[@class='yp_context']//p/text()")[0] cateManual = {
"豆瓣电影Top250": "dbtop250",
"最新电影": "zuixindianying",
"电视剧": "dsj",
"国产剧": "gcj",
"美剧": "meijutt",
"韩剧": "hanjutv",
"番剧": "fanju",
"动漫": "dm"
}
classes = []
for k in cateManual:
classes.append({
'type_name': k,
'type_id': cateManual[k]
})
result['class'] = classes
return result
vod = { def homeVideoContent(self):
"vod_id":tid, url = "https://czspp.com"
"vod_name":title, if len(self.cookies) <= 0:
"vod_pic":pic, self.getCookie(url)
"type_name":"", url = url + self.zid
"vod_year":"", rsp = self.fetch(url)
"vod_area":"", root = self.html(self.cleanText(rsp.text))
"vod_remarks":"", aList = root.xpath("//div[@class='mi_btcon']//ul/li")
"vod_actor":"", videos = []
"vod_director":"", for a in aList:
"vod_content":detail name = a.xpath('./a/img/@alt')[0]
} pic = a.xpath('./a/img/@data-original')[0]
mark = a.xpath("./div[@class='hdinfo']/span/text()")[0]
sid = a.xpath("./a/@href")[0]
sid = self.regStr(sid, "/movie/(\\S+).html")
videos.append({
"vod_id": sid,
"vod_name": name,
"vod_pic": pic,
"vod_remarks": mark
})
result = {
'list': videos
}
return result
infoArray = node.xpath(".//ul[@class='moviedteail_list']/li") header = {
for info in infoArray: "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"}
content = info.xpath('string(.)') cookies = ''
if content.startswith('类型'): def getCookie(self,url):
vod['type_name'] = content rsp = self.fetch(url,headers=self.header)
if content.startswith('年份'): baseurl = self.regStr(reg=r'(https://.*?/)', src=url)
vod['vod_year'] = content append = url.replace(baseurl,'')
if content.startswith('地区'): zid = self.regStr(rsp.text, "{0}(\\S+)\"".format(append))
vod['vod_area'] = content self.zid = zid
if content.startswith('豆瓣'): self.cookies = rsp.cookies
vod['vod_remarks'] = content if 'btwaf' not in zid:
if content.startswith('主演'): zid = ''
vod['vod_actor'] = content return rsp.cookies, zid
if content.startswith('导演'):
vod['vod_director'] = content
# if content.startswith('剧情'):
# vod['vod_content'] = content
vod_play_from = '$$$' def categoryContent(self, tid, pg, filter, extend):
playFrom = ['厂长'] result = {}
vod_play_from = vod_play_from.join(playFrom) url = 'https://czspp.com/{0}/page/{1}'.format(tid,pg)
if len(self.cookies) <= 0:
vod_play_url = '$$$' self.getCookie(url)
playList = [] url = url + self.zid
vodList = root.xpath("//div[@class='paly_list_btn']") rsp = self.fetch(url, cookies=self.cookies,headers=self.header)
for vl in vodList: root = self.html(self.cleanText(rsp.text))
vodItems = [] aList = root.xpath("//div[contains(@class,'bt_img mi_ne_kd mrb')]/ul/li")
aList = vl.xpath('./a') videos = []
for tA in aList: for a in aList:
href = tA.xpath('./@href')[0] name = a.xpath('./a/img/@alt')[0]
name = tA.xpath('./text()')[0] pic = a.xpath('./a/img/@data-original')[0]
tId = self.regStr(href,'/v_play/(\\S+).html') mark = a.xpath("./div[@class='hdinfo']/span/text()")[0]
vodItems.append(name + "$" + tId) sid = a.xpath("./a/@href")[0]
joinStr = '#' sid = self.regStr(sid, "/movie/(\\S+).html")
joinStr = joinStr.join(vodItems) videos.append({
playList.append(joinStr) "vod_id": sid,
vod_play_url = vod_play_url.join(playList) "vod_name": name,
"vod_pic": pic,
"vod_remarks": mark
})
result['list'] = videos
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
return result
vod['vod_play_from'] = vod_play_from def detailContent(self, array):
vod['vod_play_url'] = vod_play_url tid = array[0]
url = 'https://czspp.com/movie/{0}.html'.format(tid)
if len(self.cookies) <= 0:
self.getCookie(url)
url = url + self.zid
rsp = self.fetch(url,cookies=self.cookies,headers=self.header)
root = self.html(self.cleanText(rsp.text))
node = root.xpath("//div[@class='dyxingq']")[0]
pic = node.xpath(".//div[@class='dyimg fl']/img/@src")[0]
title = node.xpath('.//h1/text()')[0]
detail = root.xpath(".//div[@class='yp_context']//p/text()")[0]
vod = {
"vod_id": tid,
"vod_name": title,
"vod_pic": pic,
"type_name": "",
"vod_year": "",
"vod_area": "",
"vod_remarks": "",
"vod_actor": "",
"vod_director": "",
"vod_content": detail
}
infoArray = node.xpath(".//ul[@class='moviedteail_list']/li")
for info in infoArray:
content = info.xpath('string(.)')
if content.startswith('地区'):
tpyeare = ''
for inf in info:
tn = inf.text
tpyeare = tpyeare +'/'+'{0}'.format(tn)
vod['vod_area'] = tpyeare.strip('/')
if content.startswith('年份'):
vod['vod_year'] = content.replace("年份:","")
if content.startswith('主演'):
tpyeact = ''
for inf in info:
tn = inf.text
tpyeact = tpyeact +'/'+'{0}'.format(tn)
vod['vod_actor'] = tpyeact.strip('/')
if content.startswith('导演'):
tpyedire = ''
for inf in info:
tn = inf.text
tpyedire = tpyedire +'/'+'{0}'.format(tn)
vod['vod_director'] = tpyedire .strip('/')
vod_play_from = '$$$'
playFrom = ['厂长']
vod_play_from = vod_play_from.join(playFrom)
vod_play_url = '$$$'
playList = []
vodList = root.xpath("//div[@class='paly_list_btn']")
for vl in vodList:
vodItems = []
aList = vl.xpath('./a')
for tA in aList:
href = tA.xpath('./@href')[0]
name = tA.xpath('./text()')[0].replace('\xa0','')
tId = self.regStr(href, '/v_play/(\\S+).html')
vodItems.append(name + "$" + tId)
joinStr = '#'
joinStr = joinStr.join(vodItems)
playList.append(joinStr)
vod_play_url = vod_play_url.join(playList)
result = { vod['vod_play_from'] = vod_play_from
'list':[ vod['vod_play_url'] = vod_play_url
vod result = {
] 'list': [
} vod
return result ]
}
return result
def searchContent(self,key,quick): def searchContent(self, key, quick):
url = 'https://czspp.com/xssearch?q={0}'.format(key) url = 'https://czspp.com/xssearch?q={0}'.format(key)
# getHeader() if len(self.cookies) <= 0:
rsp = self.fetch(url) self.getCookie(url)
root = self.html(rsp.text) url = url + self.zid
rsp = self.fetch(url,cookies=self.cookies,headers=self.header)
root = self.html(self.cleanText(rsp.text))
vodList = root.xpath("//div[contains(@class,'mi_ne_kd')]/ul/li/a")
videos = []
for vod in vodList:
name = vod.xpath('./img/@alt')[0]
pic = vod.xpath('./img/@data-original')[0]
href = vod.xpath('./@href')[0]
tid = self.regStr(href, 'movie/(\\S+).html')
res = vod.xpath('./div[@class="jidi"]/span/text()')
if len(res) == 0:
remark = '全1集'
else:
remark = vod.xpath('./div[@class="jidi"]/span/text()')[0]
videos.append({
"vod_id": tid,
"vod_name": name,
"vod_pic": pic,
"vod_remarks": remark
})
result = {
'list': videos
}
return result
config = {
"player": {},
"filter": {}
}
header = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36"
}
def parseCBC(self, enc, key, iv):
keyBytes = key.encode("utf-8")
ivBytes = iv.encode("utf-8")
cipher = AES.new(keyBytes, AES.MODE_CBC, ivBytes)
msg = cipher.decrypt(enc)
paddingLen = msg[len(msg) - 1]
return msg[0:-paddingLen]
result = {} def playerContent(self, flag, id, vipFlags):
vodList = root.xpath("//div[contains(@class,'mi_ne_kd')]/ul/li/a") result = {}
videos = [] url = 'https://czspp.com/v_play/{0}.html'.format(id)
for vod in vodList: if len(self.cookies) <= 0:
name = vod.xpath('./img/@alt')[0] self.getCookie(url)
pic = vod.xpath('./img/@data-original')[0] url = url + self.zid
href = vod.xpath('./@href')[0] pat = '\\"([^\\"]+)\\";var [\\d\\w]+=function dncry.*md5.enc.Utf8.parse\\(\\"([\\d\\w]+)\\".*md5.enc.Utf8.parse\\(([\\d]+)\\)'
tid = self.regStr(href,'movie/(\\S+).html') rsp = self.fetch(url,cookies=self.cookies,headers=self.header)
remark = "" html = rsp.text
videos.append({ content = self.regStr(html, pat)
"vod_id": tid, if content == '':
"vod_name": name, str3 = url
"vod_pic": pic, pars = 1
"vod_remarks": remark header = {
}) "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
result = { }
'list':videos else:
} key = self.regStr(html, pat, 2)
return result iv = self.regStr(html, pat, 3)
decontent = self.parseCBC(base64.b64decode(content), key, iv).decode()
urlPat = 'video: \\{url: \\\"([^\\\"]+)\\\"'
vttPat = 'subtitle: \\{url:\\\"([^\\\"]+\\.vtt)\\\"'
str3 = self.regStr(decontent, urlPat)
str4 = self.regStr(decontent, vttPat)
self.loadVtt(str3)
pars = 0
header = ''
if len(str4) > 0:
result['subf'] = '/vtt/utf-8'
result['subt'] = ''
result = {
'parse': pars,
'playUrl': '',
'url': str3,
'header': header
}
return result
config = {
"player": { },
"filter": { }
}
header = {
"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36"
}
def parseCBC(self, enc, key, iv): def loadVtt(self, url):
keyBytes = key.encode("utf-8") pass
ivBytes = iv.encode("utf-8")
cipher = AES.new(keyBytes, AES.MODE_CBC, ivBytes)
msg = cipher.decrypt(enc)
paddingLen = msg[len(msg)-1]
return msg[0:-paddingLen]
def playerContent(self,flag,id,vipFlags): def isVideoFormat(self, url):
url = 'https://czspp.com/v_play/{0}.html'.format(id) pass
pat = '\\"([^\\"]+)\\";var [\\d\\w]+=function dncry.*md5.enc.Utf8.parse\\(\\"([\\d\\w]+)\\".*md5.enc.Utf8.parse\\(([\\d]+)\\)'
rsp = self.fetch(url)
html = rsp.text def manualVideoCheck(self):
content = self.regStr(html,pat) pass
key = self.regStr(html,pat,2)
iv = self.regStr(html,pat,3)
decontent = self.parseCBC(base64.b64decode(content),key,iv).decode()
urlPat = 'video: \\{url: \\\"([^\\\"]+)\\\"' def localProxy(self, param):
vttPat = 'subtitle: \\{url:\\\"([^\\\"]+\\.vtt)\\\"' action = {}
return [200, "video/MP2T", action, ""]
str3 = self.regStr(decontent,urlPat)
str4 = self.regStr(decontent,vttPat)
self.loadVtt(str3)
result = {
'parse':'0',
'playUrl':'',
'url':str3,
'header':''
}
if len(str4) > 0:
result['subf'] = '/vtt/utf-8'
# result['subt'] = Proxy.localProxyUrl() + "?do=czspp&url=" + URLEncoder.encode(str4)
result['subt'] = ''
return result
def loadVtt(self,url):
print(url)
def isVideoFormat(self,url):
pass
def manualVideoCheck(self):
pass
def localProxy(self,param):
action = {}
return [200, "video/MP2T", action, ""]
\ No newline at end of file
#coding=utf-8
#!/usr/bin/python
import sys
sys.path.append('..')
from base.spider import Spider
import re
import math
class Spider(Spider):
def getName(self):
return "体育直播"
def init(self,extend=""):
pass
def isVideoFormat(self,url):
pass
def manualVideoCheck(self):
pass
def homeContent(self,filter):
result = {}
cateManual = {
"全部": ""
}
classes = []
for k in cateManual:
classes.append({
'type_name': k,
'type_id': cateManual[k]
})
result['class'] = classes
if (filter):
result['filters'] = self.config['filter']
return result
def homeVideoContent(self):
result = {}
return result
def categoryContent(self,tid,pg,filter,extend):
result = {}
url = 'https://m.jrskbs.com'
rsp = self.fetch(url)
html = self.html(rsp.text)
aList = html.xpath("//div[contains(@class, 'contentList')]/a")
videos = []
numvL = len(aList)
pgc = math.ceil(numvL/15)
for a in aList:
aid = a.xpath("./@href")[0]
aid = self.regStr(reg=r'/live/(.*?).html', src=aid)
img = a.xpath(".//div[@class='contentLeft']/p/img/@src")[0]
home = a.xpath(".//div[@class='contentLeft']/p[@class='false false']/text()")[0]
away = a.xpath(".//div[@class='contentRight']/p[@class='false false']/text()")[0]
rmList = a.xpath(".//div[@class='contentCenter']/p/text()")
remark = rmList[1].replace('|','').replace(' ','') + '|' + rmList[0]
videos.append({
"vod_id": aid,
"vod_name": home + 'vs' + away,
"vod_pic": img,
"vod_remarks": remark
})
result['list'] = videos
result['page'] = pg
result['pagecount'] = pgc
result['limit'] = numvL
result['total'] = numvL
return result
def detailContent(self,array):
aid = array[0]
url = "http://m.jrskbs.com/live/{0}.html".format(aid)
rsp = self.fetch(url)
root = self.html(rsp.text)
divContent = root.xpath("//div[@class='today']")[0]
home = divContent.xpath(".//p[@class='onePlayer homeTeam']/text()")[0]
away = divContent.xpath(".//div[3]/text()")[0].strip()
title = home + 'vs' + away
pic = divContent.xpath(".//img[@class='gameLogo1 homeTeam_img']/@src")[0]
typeName = divContent.xpath(".//div/p[@class='name1 matchTime_wap']/text()")[0]
remark = divContent.xpath(".//div/p[@class='time1 matchTitle']/text()")[0].replace(' ','')
vod = {
"vod_id": aid,
"vod_name": title,
"vod_pic": pic,
"type_name": typeName,
"vod_year": "",
"vod_area": "",
"vod_remarks": remark,
"vod_actor": '',
"vod_director":'',
"vod_content": ''
}
urlList = root.xpath("//div[@class='liveshow']/a")
playUrl = ''
for url in urlList:
name = url.xpath("./text()")[0]
purl = url.xpath("./@data-url")[0]
playUrl =playUrl + '{0}${1}#'.format(name, purl)
vod['vod_play_from'] = '体育直播'
vod['vod_play_url'] = playUrl
result = {
'list': [
vod
]
}
return result
def searchContent(self,key,quick):
result = {}
return result
def playerContent(self,flag,id,vipFlags):
result = {}
url = id
if '04stream' in url:
rsp = self.fetch(url)
html = rsp.text
strList = re.findall(r"eval\((.*?)\);", html)
fuctList = strList[1].split('+')
scrpit = ''
for fuc in fuctList:
if fuc.endswith(')'):
append = fuc.split(')')[-1]
else:
append = ''
Unicode = int(self.regStr(reg=r'l\((.*?)\)', src=fuc))
char = chr(Unicode % 256)
char = char + append
scrpit = scrpit + char
par = self.regStr(reg=r'/(.*)/', src=scrpit).replace(')', '')
pars = par.split('/')
infoList = strList[2].split('+')
str = ''
for info in infoList:
if info.startswith('O'):
Unicode = int(int(self.regStr(reg=r'O\((.*?)\)', src=info)) / int(pars[0]) / int(pars[1]))
char = chr(Unicode % 256)
str = str + char
purl = self.regStr(reg=r"play_url=\'(.*?)\'", src=str)
result["parse"] = 0
elif 'v.stnye.cc' in url:
purl = id
result["parse"] = 1
elif 'dplayer' in url:
url = 'https://m.jrskbs.com' + url
rsp = self.fetch(url)
purl = self.regStr(reg=r'var PlayUrl = \"(.*?)\"', src=rsp.text)
result["parse"] = 0
result["playUrl"] = ''
result["url"] = purl
result["header"] = ''
return result
config = {
"player": {},
"filter": {}
}
header = {}
def localProxy(self,param):
action = {
'url':'',
'header':'',
'param':'',
'type':'string',
'after':''
}
return [200, "video/MP2T", action, ""]
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册