Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
乜汻、
dr_py
提交
18652822
dr_py
项目概览
乜汻、
/
dr_py
与 Fork 源项目一致
Fork自
晚风拂柳颜 / dr_py
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
dr_py
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
18652822
编写于
8月 26, 2022
作者:
H
hjdhnx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
支持没有二级的规则,新增玩偶姐姐
上级
66200b2d
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
166 addition
and
46 deletion
+166
-46
js/玩偶姐姐.js
js/玩偶姐姐.js
+8
-7
models/cms.py
models/cms.py
+154
-38
readme.md
readme.md
+4
-1
未找到文件。
js/玩偶姐姐.js
浏览文件 @
18652822
...
@@ -2,13 +2,13 @@ var rule = {
...
@@ -2,13 +2,13 @@ var rule = {
title
:
'
玩偶姐姐
'
,
title
:
'
玩偶姐姐
'
,
host
:
'
https://hongkongdollvideo.com
'
,
host
:
'
https://hongkongdollvideo.com
'
,
homeUrl
:
'
/latest/
'
,
homeUrl
:
'
/latest/
'
,
url
:
'
/fyclass/fypage.html[/fypage/]
'
,
url
:
'
/fyclass/fypage.html[/fyclass/]
'
,
headers
:{
'
User-Agent
'
:
'
MOBILE_UA
'
},
timeout
:
5000
,
searchUrl
:
'
/vodsearch/**----------fypage---.html
'
,
searchUrl
:
'
/vodsearch/**----------fypage---.html
'
,
ua
:
'
MOBILE_UA
'
,
class_parse
:
'
#side-menu:lt(1) li;a&&Text;a&&href;com/(.*?)/
'
,
class_name
:
'
最新&Hongkong Doll&麻豆传媒&91制片厂&天美传媒&蜜桃传媒&皇家华人&星空传媒&精东影业&乐播传媒&成人头条&乌鸦传媒&兔子先生&杏吧原创&mini传媒&大象传媒&开心鬼传媒&PsychoPorn&糖心Vlog
'
,
class_url
:
'
latest&Hongkong Doll&麻豆传媒&91制片厂&天美传媒&蜜桃传媒&皇家华人&星空传媒&精东影业&乐播传媒&成人头条&乌鸦传媒&兔子先生&杏吧原创&mini传媒&大象传媒&开心鬼传媒&PsychoPorn&糖心Vlog
'
,
class_parse
:
'
body&&#side-menu:not(:has(.menu-icons))&&li;
'
,
一级
:
'
.col-sm-6;h3&&Text;img&&data-src;.date&&Text;a&&href
'
,
一级
:
'
.col-sm-6;h3&&Text;img&&data-src;.date&&Text;a&&href
'
,
二级
:{
"
title
"
:
"
h1&&Text;.module-info-tag&&Text
"
,
"
img
"
:
"
.lazyload&&data-original
"
,
"
desc
"
:
"
.module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text
"
,
"
content
"
:
"
.module-info-introduction&&Text
"
,
"
tabs
"
:
"
.module-tab-item
"
,
"
lists
"
:
"
.module-play-list:eq(#id) a
"
},
二级
:
'
*
'
,
搜索
:
'
body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text
'
,
}
}
\ No newline at end of file
models/cms.py
浏览文件 @
18652822
...
@@ -4,7 +4,7 @@
...
@@ -4,7 +4,7 @@
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date : 2022/8/25
# Date : 2022/8/25
import
requests
import
requests
import
re
from
utils.web
import
*
from
utils.web
import
*
from
utils.config
import
config
from
utils.config
import
config
from
utils.htmlParser
import
jsoup
from
utils.htmlParser
import
jsoup
...
@@ -12,19 +12,36 @@ from urllib.parse import urljoin
...
@@ -12,19 +12,36 @@ from urllib.parse import urljoin
class
CMS
:
class
CMS
:
def
__init__
(
self
,
rule
):
def
__init__
(
self
,
rule
):
self
.
host
=
rule
.
get
(
'host'
,
''
).
rstrip
(
'/'
)
host
=
rule
.
get
(
'host'
,
''
).
rstrip
(
'/'
)
self
.
homeUrl
=
rule
.
get
(
'homeUrl'
,
''
)
timeout
=
rule
.
get
(
'timeout'
,
2000
)
self
.
url
=
rule
.
get
(
'url'
,
''
).
rstrip
(
'/'
)
homeUrl
=
rule
.
get
(
'homeUrl'
,
''
)
self
.
detailUrl
=
rule
.
get
(
'detailUrl'
,
''
).
rstrip
(
'/'
)
url
=
rule
.
get
(
'url'
,
''
)
self
.
searchUrl
=
rule
.
get
(
'searchUrl'
,
''
)
detailUrl
=
rule
.
get
(
'detailUrl'
,
''
)
ua
=
rule
.
get
(
'ua'
,
''
)
searchUrl
=
rule
.
get
(
'searchUrl'
,
''
)
if
ua
==
'MOBILE_UA'
:
headers
=
rule
.
get
(
'headers'
,{})
self
.
ua
=
MOBILE_UA
keys
=
headers
.
keys
()
elif
ua
==
'PC_UA'
:
for
k
in
headers
.
keys
():
self
.
ua
=
PC_UA
if
str
(
k
).
lower
()
==
'user-agent'
:
v
=
headers
[
k
]
if
v
==
'MOBILE_UA'
:
headers
[
k
]
=
MOBILE_UA
elif
v
==
'PC_UA'
:
headers
[
k
]
=
PC_UA
lower_keys
=
list
(
map
(
lambda
x
:
x
.
lower
(),
keys
))
if
not
'user-agent'
in
lower_keys
:
headers
[
'User-Agent'
]
=
UA
self
.
headers
=
headers
self
.
host
=
host
self
.
homeUrl
=
urljoin
(
host
,
homeUrl
)
if
host
and
homeUrl
else
homeUrl
if
url
.
find
(
'['
)
>-
1
and
url
.
find
(
']'
)
>
-
1
:
u1
=
url
.
split
(
'['
)[
0
]
u2
=
url
.
split
(
'['
)[
1
].
split
(
']'
)[
0
]
self
.
url
=
urljoin
(
host
,
u1
)
+
'['
+
urljoin
(
host
,
u2
)
+
']'
if
host
and
url
else
url
else
:
else
:
self
.
ua
=
UA
self
.
url
=
urljoin
(
host
,
url
)
if
host
and
url
else
url
self
.
searchUrl
=
rule
.
get
(
'searchUrl'
,
''
)
self
.
detailUrl
=
urljoin
(
host
,
detailUrl
)
if
host
and
detailUrl
else
detailUrl
self
.
searchUrl
=
urljoin
(
host
,
searchUrl
)
if
host
and
searchUrl
else
searchUrl
self
.
class_name
=
rule
.
get
(
'class_name'
,
''
)
self
.
class_name
=
rule
.
get
(
'class_name'
,
''
)
self
.
class_url
=
rule
.
get
(
'class_url'
,
''
)
self
.
class_url
=
rule
.
get
(
'class_url'
,
''
)
self
.
class_parse
=
rule
.
get
(
'class_parse'
,
''
)
self
.
class_parse
=
rule
.
get
(
'class_parse'
,
''
)
...
@@ -32,26 +49,98 @@ class CMS:
...
@@ -32,26 +49,98 @@ class CMS:
self
.
二级
=
rule
.
get
(
'二级'
,
''
)
self
.
二级
=
rule
.
get
(
'二级'
,
''
)
self
.
搜索
=
rule
.
get
(
'搜索'
,
''
)
self
.
搜索
=
rule
.
get
(
'搜索'
,
''
)
self
.
title
=
rule
.
get
(
'title'
,
''
)
self
.
title
=
rule
.
get
(
'title'
,
''
)
self
.
timeout
=
round
(
int
(
timeout
)
/
1000
,
2
)
self
.
filter
=
rule
.
get
(
'filter'
,[])
self
.
filter
=
rule
.
get
(
'filter'
,[])
self
.
extend
=
rule
.
get
(
'extend'
,[])
self
.
extend
=
rule
.
get
(
'extend'
,[])
def
getName
(
self
):
def
getName
(
self
):
return
self
.
title
return
self
.
title
def
regexp
(
self
,
prule
,
text
,
pos
=
None
):
ret
=
re
.
search
(
prule
,
text
).
groups
()
if
pos
!=
None
and
isinstance
(
pos
,
int
):
return
ret
[
pos
]
else
:
return
ret
def
test
(
self
,
text
,
string
):
searchObj
=
re
.
search
(
rf
'
{
text
}
'
,
string
,
re
.
M
|
re
.
I
)
# print(searchObj)
# global vflag
if
searchObj
:
# vflag = searchObj.group()
pass
return
searchObj
def
blank
(
self
):
result
=
{
'list'
:
[]
}
return
result
def
blank_vod
(
self
):
return
{
"vod_id"
:
""
,
"vod_name"
:
""
,
"vod_pic"
:
""
,
"type_name"
:
""
,
"vod_year"
:
""
,
"vod_area"
:
""
,
"vod_remarks"
:
""
,
"vod_actor"
:
""
,
"vod_director"
:
""
,
"vod_content"
:
""
}
def
jsoup
(
self
):
jsp
=
jsoup
(
self
.
url
)
pdfh
=
jsp
.
pdfh
pdfa
=
jsp
.
pdfa
pd
=
jsp
.
pd
pq
=
jsp
.
pq
return
pdfh
,
pdfa
,
pd
,
pq
def
homeContent
(
self
):
def
homeContent
(
self
):
# yanaifei
# yanaifei
# https://yanetflix.com/vodtype/dianying.html
# https://yanetflix.com/vodtype/dianying.html
result
=
{}
result
=
{}
classes
=
[]
if
self
.
class_url
and
self
.
class_name
:
class_names
=
self
.
class_name
.
split
(
'&'
)
class_names
=
self
.
class_name
.
split
(
'&'
)
class_urls
=
self
.
class_url
.
split
(
'&'
)
class_urls
=
self
.
class_url
.
split
(
'&'
)
cnt
=
min
(
len
(
class_urls
),
len
(
class_names
))
cnt
=
min
(
len
(
class_urls
),
len
(
class_names
))
classes
=
[]
for
i
in
range
(
cnt
):
for
i
in
range
(
cnt
):
classes
.
append
({
classes
.
append
({
'type_name'
:
class_names
[
i
],
'type_name'
:
class_names
[
i
],
'type_id'
:
class_urls
[
i
]
'type_id'
:
class_urls
[
i
]
})
})
print
(
self
.
url
)
# print(self.url)
if
self
.
homeUrl
and
self
.
class_parse
:
# print(self.homeUrl)
# print(self.class_parse)
try
:
r
=
requests
.
get
(
self
.
homeUrl
,
headers
=
self
.
headers
,
timeout
=
self
.
timeout
)
html
=
r
.
text
p
=
self
.
class_parse
.
split
(
';'
)
jsp
=
jsoup
(
self
.
url
)
pdfh
=
jsp
.
pdfh
pdfa
=
jsp
.
pdfa
pd
=
jsp
.
pd
items
=
pdfa
(
html
,
p
[
0
])
for
item
in
items
:
title
=
pdfh
(
item
,
p
[
1
])
url
=
pd
(
item
,
p
[
2
])
tag
=
url
if
len
(
p
)
>
3
and
p
[
3
].
strip
():
tag
=
self
.
regexp
(
p
[
3
].
strip
(),
url
,
0
)
classes
.
append
({
'type_name'
:
title
,
'type_id'
:
tag
})
except
Exception
as
e
:
print
(
e
)
result
[
'class'
]
=
classes
result
[
'class'
]
=
classes
if
self
.
filter
:
if
self
.
filter
:
result
[
'filters'
]
=
config
[
'filter'
]
result
[
'filters'
]
=
config
[
'filter'
]
...
@@ -83,10 +172,14 @@ class CMS:
...
@@ -83,10 +172,14 @@ class CMS:
# url = self.url + '/{0}.html'.format(params)
# url = self.url + '/{0}.html'.format(params)
pg
=
str
(
fypage
)
pg
=
str
(
fypage
)
url
=
self
.
url
.
replace
(
'fyclass'
,
fyclass
).
replace
(
'fypage'
,
pg
)
url
=
self
.
url
.
replace
(
'fyclass'
,
fyclass
).
replace
(
'fypage'
,
pg
)
print
(
url
)
if
fypage
==
1
and
self
.
test
(
'[\[\]]'
,
url
):
headers
=
{
'user-agent'
:
self
.
ua
}
url
=
url
.
split
(
'['
)[
1
].
split
(
']'
)[
0
]
r
=
requests
.
get
(
url
,
headers
=
headers
)
r
=
requests
.
get
(
url
,
headers
=
self
.
headers
,
timeout
=
self
.
timeout
)
print
(
r
.
url
)
p
=
self
.
一级
.
split
(
';'
)
# 解析
p
=
self
.
一级
.
split
(
';'
)
# 解析
if
len
(
p
)
<
5
:
return
self
.
blank
()
jsp
=
jsoup
(
self
.
url
)
jsp
=
jsoup
(
self
.
url
)
pdfh
=
jsp
.
pdfh
pdfh
=
jsp
.
pdfh
pdfa
=
jsp
.
pdfa
pdfa
=
jsp
.
pdfa
...
@@ -125,17 +218,34 @@ class CMS:
...
@@ -125,17 +218,34 @@ class CMS:
:return:
:return:
"""
"""
# video-info-header
# video-info-header
fyid
=
str
(
array
[
0
])
detailUrl
=
str
(
array
[
0
])
if
not
fyid
.
startswith
(
'http'
):
print
(
detailUrl
)
url
=
self
.
detailUrl
.
replace
(
'fyid'
,
fyid
)
if
not
detailUrl
.
startswith
(
'http'
):
url
=
self
.
detailUrl
.
replace
(
'fyid'
,
detailUrl
)
else
:
else
:
url
=
fyid
url
=
detailUrl
print
(
url
)
print
(
url
)
headers
=
{
'user-agent'
:
self
.
ua
}
r
=
requests
.
get
(
url
,
headers
=
self
.
headers
,
timeout
=
self
.
timeout
)
r
=
requests
.
get
(
url
,
headers
=
headers
)
html
=
r
.
text
html
=
r
.
text
# print(html)
# print(html)
p
=
self
.
二级
# 解析
p
=
self
.
二级
# 解析
if
p
==
'*'
:
vod
=
self
.
blank_vod
()
vod
[
'vod_play_from'
]
=
'道长在线'
vod
[
'desc'
]
=
detailUrl
vod
[
'vod_actor'
]
=
'没有二级,只有一级链接直接嗅探播放'
vod
[
'content'
]
=
detailUrl
vod
[
'vod_play_url'
]
=
'嗅探播放$'
+
detailUrl
result
=
{
'list'
:
[
vod
]
}
return
result
if
not
isinstance
(
p
,
dict
):
return
self
.
blank
()
jsp
=
jsoup
(
self
.
url
)
jsp
=
jsoup
(
self
.
url
)
pdfh
=
jsp
.
pdfh
pdfh
=
jsp
.
pdfh
pdfa
=
jsp
.
pdfa
pdfa
=
jsp
.
pdfa
...
@@ -165,7 +275,7 @@ class CMS:
...
@@ -165,7 +275,7 @@ class CMS:
obj
[
'img'
]
=
img
obj
[
'img'
]
=
img
vod
=
{
vod
=
{
"vod_id"
:
fyid
,
"vod_id"
:
detailUrl
,
"vod_name"
:
vod_name
,
"vod_name"
:
vod_name
,
"vod_pic"
:
obj
.
get
(
'img'
,
''
),
"vod_pic"
:
obj
.
get
(
'img'
,
''
),
"type_name"
:
obj
.
get
(
'title'
,
''
),
"type_name"
:
obj
.
get
(
'title'
,
''
),
...
@@ -212,14 +322,18 @@ class CMS:
...
@@ -212,14 +322,18 @@ class CMS:
def
searchContent
(
self
,
key
,
fypage
=
1
):
def
searchContent
(
self
,
key
,
fypage
=
1
):
pg
=
str
(
fypage
)
pg
=
str
(
fypage
)
if
not
self
.
searchUrl
:
return
self
.
blank
()
url
=
self
.
searchUrl
.
replace
(
'**'
,
key
).
replace
(
'fypage'
,
pg
)
url
=
self
.
searchUrl
.
replace
(
'**'
,
key
).
replace
(
'fypage'
,
pg
)
if
not
str
(
url
).
startswith
(
'http'
):
url
=
urljoin
(
self
.
url
,
url
)
print
(
url
)
print
(
url
)
headers
=
{
'user-agent'
:
self
.
ua
}
r
=
requests
.
get
(
url
,
headers
=
self
.
headers
)
r
=
requests
.
get
(
url
,
headers
=
headers
)
html
=
r
.
text
html
=
r
.
text
p
=
self
.
搜索
.
split
(
';'
)
# 解析
if
not
self
.
搜索
:
return
self
.
blank
()
p
=
self
.
一级
.
split
(
';'
)
if
self
.
搜索
==
'*'
and
self
.
一级
else
self
.
搜索
.
split
(
';'
)
# 解析
if
len
(
p
)
<
5
:
return
self
.
blank
()
jsp
=
jsoup
(
self
.
url
)
jsp
=
jsoup
(
self
.
url
)
pdfh
=
jsp
.
pdfh
pdfh
=
jsp
.
pdfh
pdfa
=
jsp
.
pdfa
pdfa
=
jsp
.
pdfa
...
@@ -254,7 +368,9 @@ if __name__ == '__main__':
...
@@ -254,7 +368,9 @@ if __name__ == '__main__':
rule
=
ctx
.
eval
(
'rule'
)
rule
=
ctx
.
eval
(
'rule'
)
cms
=
CMS
(
rule
)
cms
=
CMS
(
rule
)
print
(
cms
.
title
)
print
(
cms
.
title
)
print
(
cms
.
homeContent
())
# print(cms.homeContent())
# print(cms.categoryContent('latest',1))
print
(
cms
.
detailContent
([
'https://hongkongdollvideo.com/video/b22c7cb6df40a3c4.html'
]))
# cms.categoryContent('dianying',1)
# cms.categoryContent('dianying',1)
# print(cms.detailContent(['67391']))
# print(cms.detailContent(['67391']))
# print(cms.searchContent('斗罗大陆'))
# print(cms.searchContent('斗罗大陆'))
\ No newline at end of file
readme.md
浏览文件 @
18652822
...
@@ -11,3 +11,6 @@
...
@@ -11,3 +11,6 @@
本地地址 clan://localhost/pycms_local.json
本地地址 clan://localhost/pycms_local.json
推荐把文件放到 /storage/emulated/0/PlutoPlayer/pycms_local.json
推荐把文件放到 /storage/emulated/0/PlutoPlayer/pycms_local.json
并且pluto要自己切换路径为PlutoPlayer
并且pluto要自己切换路径为PlutoPlayer
### 相关教程
[
pyquery定位
](
https://blog.csdn.net/Arise007/article/details/79513094
)
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录