Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2201_75808940
dr_py
提交
ec2cfa5b
dr_py
项目概览
2201_75808940
/
dr_py
与 Fork 源项目一致
Fork自
晚风拂柳颜 / dr_py
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
dr_py
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
ec2cfa5b
编写于
8月 26, 2022
作者:
H
hjdhnx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
555影视增加了首页源和限制条数写法
上级
c4ca5a47
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
136 addition
and
53 deletion
+136
-53
app.py
app.py
+9
-5
js/555影视.js
js/555影视.js
+3
-0
models/cms.py
models/cms.py
+123
-47
readme.md
readme.md
+1
-1
未找到文件。
app.py
浏览文件 @
ec2cfa5b
...
...
@@ -19,7 +19,7 @@ from utils.web import *
rule_list
=
getRules
()
print
(
rule_list
)
def
getParmas
(
key
=
None
):
def
getParmas
(
key
=
None
,
value
=
''
):
"""
获取链接参数
:param key:
...
...
@@ -31,7 +31,7 @@ def getParmas(key=None):
elif
request
.
method
==
'GET'
:
args
=
request
.
args
if
key
:
return
args
.
get
(
key
,
''
)
return
args
.
get
(
key
,
value
)
else
:
return
args
...
...
@@ -66,7 +66,8 @@ def vod():
flag
=
getParmas
(
'flag'
)
filter
=
getParmas
(
'filter'
)
t
=
getParmas
(
't'
)
pg
=
getParmas
(
'pg'
)
pg
=
getParmas
(
'pg'
,
'1'
)
pg
=
int
(
pg
)
ids
=
getParmas
(
'ids'
)
q
=
getParmas
(
'q'
)
...
...
@@ -75,7 +76,10 @@ def vod():
# print(data)
return
jsonify
(
data
)
if
ac
and
ids
:
# 二级
data
=
cms
.
detailContent
(
ids
.
split
(
','
))
id_list
=
ids
.
split
(
','
)
# print(len(id_list))
# print(id_list)
data
=
cms
.
detailContent
(
pg
,
id_list
)
# print(data)
return
jsonify
(
data
)
if
wd
:
# 搜索
...
...
@@ -84,7 +88,7 @@ def vod():
return
jsonify
(
data
)
# return jsonify({'rule':rule,'js_code':js_code})
home_data
=
cms
.
homeContent
()
home_data
=
cms
.
homeContent
(
pg
)
return
jsonify
(
home_data
)
@
app
.
route
(
'/clear'
)
...
...
js/555影视.js
浏览文件 @
ec2cfa5b
...
...
@@ -10,6 +10,9 @@ var rule = {
},
class_name
:
'
电影&连续剧&福利&动漫&综艺
'
,
class_url
:
'
1&2&124&4&3
'
,
limit
:
10
,
推荐
:
'
.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href
'
,
double
:
true
,
// 推荐内容是否双层定位
一级
:
'
body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href
'
,
二级
:{
"
title
"
:
"
h1&&Text;.module-info-tag&&Text
"
,
"
img
"
:
"
.lazyload&&data-original
"
,
"
desc
"
:
"
.module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text
"
,
"
content
"
:
"
.module-info-introduction&&Text
"
,
"
tabs
"
:
"
.module-tab-item
"
,
"
lists
"
:
"
.module-play-list:eq(#id) a
"
},
搜索
:
'
body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text
'
,
...
...
models/cms.py
浏览文件 @
ec2cfa5b
...
...
@@ -5,20 +5,24 @@
# Date : 2022/8/25
import
requests
import
re
import
math
from
utils.web
import
*
from
utils.config
import
config
from
utils.htmlParser
import
jsoup
from
urllib.parse
import
urljoin
from
concurrent.futures
import
ThreadPoolExecutor
# 引入线程池
class
CMS
:
def
__init__
(
self
,
rule
):
host
=
rule
.
get
(
'host'
,
''
).
rstrip
(
'/'
)
timeout
=
rule
.
get
(
'timeout'
,
2
000
)
timeout
=
rule
.
get
(
'timeout'
,
5
000
)
homeUrl
=
rule
.
get
(
'homeUrl'
,
'/'
)
url
=
rule
.
get
(
'url'
,
''
)
detailUrl
=
rule
.
get
(
'detailUrl'
,
''
)
searchUrl
=
rule
.
get
(
'searchUrl'
,
''
)
headers
=
rule
.
get
(
'headers'
,{})
limit
=
rule
.
get
(
'limit'
,
6
)
self
.
limit
=
min
(
limit
,
20
)
keys
=
headers
.
keys
()
for
k
in
headers
.
keys
():
if
str
(
k
).
lower
()
==
'user-agent'
:
...
...
@@ -45,9 +49,11 @@ class CMS:
self
.
class_name
=
rule
.
get
(
'class_name'
,
''
)
self
.
class_url
=
rule
.
get
(
'class_url'
,
''
)
self
.
class_parse
=
rule
.
get
(
'class_parse'
,
''
)
self
.
double
=
rule
.
get
(
'double'
,
False
)
self
.
一级
=
rule
.
get
(
'一级'
,
''
)
self
.
二级
=
rule
.
get
(
'二级'
,
''
)
self
.
搜索
=
rule
.
get
(
'搜索'
,
''
)
self
.
推荐
=
rule
.
get
(
'推荐'
,
''
)
self
.
title
=
rule
.
get
(
'title'
,
''
)
self
.
timeout
=
round
(
int
(
timeout
)
/
1000
,
2
)
self
.
filter
=
rule
.
get
(
'filter'
,[])
...
...
@@ -100,11 +106,12 @@ class CMS:
pq
=
jsp
.
pq
return
pdfh
,
pdfa
,
pd
,
pq
def
homeContent
(
self
):
def
homeContent
(
self
,
fypage
=
1
):
# yanaifei
# https://yanetflix.com/vodtype/dianying.html
result
=
{}
classes
=
[]
video_result
=
self
.
blank
()
if
self
.
class_url
and
self
.
class_name
:
class_names
=
self
.
class_name
.
split
(
'&'
)
...
...
@@ -116,41 +123,104 @@ class CMS:
'type_id'
:
class_urls
[
i
]
})
# print(self.url)
if
self
.
homeUrl
.
startswith
(
'http'
)
and
self
.
class_parse
:
if
self
.
homeUrl
.
startswith
(
'http'
):
# print(self.homeUrl)
# print(self.class_parse)
try
:
r
=
requests
.
get
(
self
.
homeUrl
,
headers
=
self
.
headers
,
timeout
=
self
.
timeout
)
r
.
encoding
=
r
.
apparent_encoding
html
=
r
.
text
p
=
self
.
class_parse
.
split
(
';'
)
jsp
=
jsoup
(
self
.
url
)
pdfh
=
jsp
.
pdfh
pdfa
=
jsp
.
pdfa
pd
=
jsp
.
pd
items
=
pdfa
(
html
,
p
[
0
])
for
item
in
items
:
title
=
pdfh
(
item
,
p
[
1
])
url
=
pd
(
item
,
p
[
2
])
tag
=
url
if
len
(
p
)
>
3
and
p
[
3
].
strip
():
tag
=
self
.
regexp
(
p
[
3
].
strip
(),
url
,
0
)
classes
.
append
({
'type_name'
:
title
,
'type_id'
:
tag
})
if
self
.
class_parse
:
p
=
self
.
class_parse
.
split
(
';'
)
jsp
=
jsoup
(
self
.
url
)
pdfh
=
jsp
.
pdfh
pdfa
=
jsp
.
pdfa
pd
=
jsp
.
pd
items
=
pdfa
(
html
,
p
[
0
])
for
item
in
items
:
title
=
pdfh
(
item
,
p
[
1
])
url
=
pd
(
item
,
p
[
2
])
tag
=
url
if
len
(
p
)
>
3
and
p
[
3
].
strip
():
tag
=
self
.
regexp
(
p
[
3
].
strip
(),
url
,
0
)
classes
.
append
({
'type_name'
:
title
,
'type_id'
:
tag
})
video_result
=
self
.
homeVideoContent
(
html
,
fypage
)
except
Exception
as
e
:
print
(
e
)
result
[
'class'
]
=
classes
if
self
.
filter
:
result
[
'filters'
]
=
config
[
'filter'
]
result
.
update
(
video_result
)
return
result
def
homeVideoContent
(
self
):
result
=
{
'list'
:
[]
}
return
result
def
homeVideoContent
(
self
,
html
,
fypage
=
1
):
if
not
self
.
推荐
:
return
self
.
blank
()
p
=
self
.
推荐
.
split
(
';'
)
# 解析
if
not
self
.
double
and
len
(
p
)
<
5
:
return
self
.
blank
()
if
self
.
double
and
len
(
p
)
<
6
:
return
self
.
blank
()
result
=
{}
videos
=
[]
jsp
=
jsoup
(
self
.
homeUrl
)
pdfh
=
jsp
.
pdfh
pdfa
=
jsp
.
pdfa
pd
=
jsp
.
pd
try
:
if
self
.
double
:
items
=
pdfa
(
html
,
p
[
0
])
for
item
in
items
:
items2
=
pdfa
(
item
,
p
[
1
])
for
item2
in
items2
:
title
=
pdfh
(
item2
,
p
[
2
])
img
=
pd
(
item2
,
p
[
3
])
desc
=
pdfh
(
item2
,
p
[
4
])
link
=
pd
(
item2
,
p
[
5
])
content
=
''
if
len
(
p
)
<
7
else
pdfh
(
item2
,
p
[
6
])
videos
.
append
({
"vod_id"
:
link
,
"vod_name"
:
title
,
"vod_pic"
:
img
,
"vod_remarks"
:
desc
,
"vod_content"
:
content
,
"type_id"
:
1
,
"type_name"
:
"首页推荐"
,
})
else
:
items
=
pdfa
(
html
,
p
[
0
])
for
item
in
items
:
title
=
pdfh
(
item
,
p
[
1
])
img
=
pd
(
item
,
p
[
2
])
desc
=
pdfh
(
item
,
p
[
3
])
link
=
pd
(
item
,
p
[
4
])
content
=
''
if
len
(
p
)
<
6
else
pdfh
(
item
,
p
[
5
])
videos
.
append
({
"vod_id"
:
link
,
"vod_name"
:
title
,
"vod_pic"
:
img
,
"vod_remarks"
:
desc
,
"vod_content"
:
content
,
"type_id"
:
1
,
"type_name"
:
"首页推荐"
,
})
result
[
'list'
]
=
videos
result
[
'code'
]
=
1
result
[
'msg'
]
=
'数据列表'
result
[
'page'
]
=
fypage
result
[
'pagecount'
]
=
math
.
ceil
(
len
(
videos
)
/
self
.
limit
)
result
[
'limit'
]
=
self
.
limit
result
[
'total'
]
=
len
(
videos
)
return
result
except
Exception
as
e
:
print
(
f
'首页内容获取失败:
{
e
}
'
)
return
self
.
blank
()
def
categoryContent
(
self
,
fyclass
,
fypage
):
"""
...
...
@@ -175,6 +245,7 @@ class CMS:
if
fypage
==
1
and
self
.
test
(
'[\[\]]'
,
url
):
url
=
url
.
split
(
'['
)[
1
].
split
(
']'
)[
0
]
r
=
requests
.
get
(
url
,
headers
=
self
.
headers
,
timeout
=
self
.
timeout
)
r
.
encoding
=
r
.
apparent_encoding
print
(
r
.
url
)
p
=
self
.
一级
.
split
(
';'
)
# 解析
if
len
(
p
)
<
5
:
...
...
@@ -207,25 +278,20 @@ class CMS:
result
[
'list'
]
=
videos
result
[
'page'
]
=
fypage
result
[
'pagecount'
]
=
9999
result
[
'limit'
]
=
9
0
result
[
'limit'
]
=
9
999
result
[
'total'
]
=
999999
return
result
def
detailContent
(
self
,
array
):
"""
cms二级数据
:param array:
:return:
"""
# video-info-header
detailUrl
=
str
(
array
[
0
])
print
(
detailUrl
)
def
detailOneVod
(
self
,
id
):
detailUrl
=
str
(
id
)
vod
=
{}
if
not
detailUrl
.
startswith
(
'http'
):
url
=
self
.
detailUrl
.
replace
(
'fyid'
,
detailUrl
)
else
:
url
=
detailUrl
print
(
url
)
#
print(url)
r
=
requests
.
get
(
url
,
headers
=
self
.
headers
,
timeout
=
self
.
timeout
)
r
.
encoding
=
r
.
apparent_encoding
html
=
r
.
text
# print(html)
p
=
self
.
二级
# 解析
...
...
@@ -236,15 +302,10 @@ class CMS:
vod
[
'vod_actor'
]
=
'没有二级,只有一级链接直接嗅探播放'
vod
[
'content'
]
=
detailUrl
vod
[
'vod_play_url'
]
=
'嗅探播放$'
+
detailUrl
result
=
{
'list'
:
[
vod
]
}
return
result
return
vod
if
not
isinstance
(
p
,
dict
):
return
self
.
blank
()
return
vod
jsp
=
jsoup
(
self
.
url
)
pdfh
=
jsp
.
pdfh
...
...
@@ -313,10 +374,24 @@ class CMS:
vod
[
'vod_play_from'
]
=
vod_play_from
vod
[
'vod_play_url'
]
=
vod_play_url
return
vod
def
detailContent
(
self
,
fypage
,
array
):
"""
cms二级数据
:param array:
:return:
"""
array
=
array
[(
fypage
-
1
)
*
self
.
limit
:
min
(
self
.
limit
*
fypage
,
len
(
array
))]
thread_pool
=
ThreadPoolExecutor
(
min
(
self
.
limit
,
len
(
array
)))
# 定义线程池来启动多线程执行此任务
obj_list
=
[]
for
vod_url
in
array
:
obj
=
thread_pool
.
submit
(
self
.
detailOneVod
,
vod_url
)
obj_list
.
append
(
obj
)
thread_pool
.
shutdown
(
wait
=
True
)
# 等待所有子线程并行完毕
vod_list
=
[
obj
.
result
()
for
obj
in
obj_list
]
result
=
{
'list'
:
[
vod
]
'list'
:
vod_list
}
return
result
...
...
@@ -327,6 +402,7 @@ class CMS:
url
=
self
.
searchUrl
.
replace
(
'**'
,
key
).
replace
(
'fypage'
,
pg
)
print
(
url
)
r
=
requests
.
get
(
url
,
headers
=
self
.
headers
)
r
.
encoding
=
r
.
apparent_encoding
html
=
r
.
text
if
not
self
.
搜索
:
return
self
.
blank
()
...
...
@@ -364,13 +440,13 @@ class CMS:
if
__name__
==
'__main__'
:
from
utils
import
parser
# js_path = f'js/玩偶姐姐.js'
js_path
=
f
'js/
蓝莓
影视.js'
js_path
=
f
'js/
555
影视.js'
ctx
,
js_code
=
parser
.
runJs
(
js_path
)
rule
=
ctx
.
eval
(
'rule'
)
cms
=
CMS
(
rule
)
print
(
cms
.
title
)
print
(
cms
.
homeContent
())
print
(
cms
.
categoryContent
(
'20'
,
1
))
#
print(cms.categoryContent('20',1))
# print(cms.categoryContent('latest',1))
# print(cms.detailContent(['https://hongkongdollvideo.com/video/b22c7cb6df40a3c4.html']))
# cms.categoryContent('dianying',1)
...
...
readme.md
浏览文件 @
ec2cfa5b
...
...
@@ -29,7 +29,7 @@ var rule = {
'
User-Agent
'
:
'
MOBILE_UA
'
,
"
Cookie
"
:
"
searchneed=ok
"
},
timeout
:
5000
,
//网站的全局请求超时,默认是
2
000毫秒
timeout
:
5000
,
//网站的全局请求超时,默认是
3
000毫秒
//动态分类获取 列表;标题;链接;正则提取 不需要正则的时候后面别加分号
class_parse
:
'
#side-menu:lt(1) li;a&&Text;a&&href;com/(.*?)/
'
,
// 类似海阔一级 列表;标题;图片;描述;链接;详情 其中最后一个参数选填
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录