Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
没毛的刷子
dr_py
提交
35c816d4
dr_py
项目概览
没毛的刷子
/
dr_py
与 Fork 源项目一致
Fork自
晚风拂柳颜 / dr_py
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
dr_py
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
35c816d4
编写于
8月 25, 2022
作者:
H
hjdhnx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
完成cms二级数据封装
上级
15679fc0
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
111 addition
and
11 deletion
+111
-11
js/鸭奈飞.js
js/鸭奈飞.js
+3
-1
models/cms.py
models/cms.py
+96
-4
utils/htmlParser.py
utils/htmlParser.py
+12
-6
未找到文件。
js/鸭奈飞.js
浏览文件 @
35c816d4
var
rule
=
{
var
rule
=
{
title
:
'
鸭奈飞
'
,
title
:
'
鸭奈飞
'
,
url
:
'
https://yanetflix.com/vodshow/fyclass--------fypage---.html
'
,
url
:
'
https://yanetflix.com/vodshow/fyclass--------fypage---.html
'
,
detailUrl
:
'
https://yanetflix.com/voddetail/fyid.html
'
,
// url:'https://yanetflix.com/vodshow/',
// url:'https://yanetflix.com/vodshow/',
searchUrl
:
'
/vodsearch/**----------fypage---.html
'
,
searchUrl
:
'
/vodsearch/**----------fypage---.html
'
,
ua
:
'
MOBILE_UA
'
,
ua
:
'
MOBILE_UA
'
,
class_name
:
'
电影&连续剧&综艺&动漫
'
,
class_name
:
'
电影&连续剧&综艺&动漫
'
,
class_url
:
'
dianying&lianxuju&zongyi&dongman
'
,
class_url
:
'
dianying&lianxuju&zongyi&dongman
'
,
一级
:
'
body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href
'
,
一级
:
'
body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href
'
,
二级
:
''
,
二级
:
{
"
title
"
:
"
h1&&Text;.module-info-tag&&Text
"
,
"
img
"
:
"
.lazyload&&data-original
"
,
"
desc
"
:
"
.module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text
"
,
"
content
"
:
"
.module-info-introduction&&Text
"
,
"
tabs
"
:
"
.module-tab-item
"
,
"
lists
"
:
"
.module-play-list:eq(#id) a
"
}
,
搜索
:
''
,
搜索
:
''
,
}
}
\ No newline at end of file
models/cms.py
浏览文件 @
35c816d4
...
@@ -12,6 +12,7 @@ from utils.htmlParser import jsoup
...
@@ -12,6 +12,7 @@ from utils.htmlParser import jsoup
class
CMS
:
class
CMS
:
def
__init__
(
self
,
rule
):
def
__init__
(
self
,
rule
):
self
.
url
=
rule
.
get
(
'url'
,
''
).
rstrip
(
'/'
)
self
.
url
=
rule
.
get
(
'url'
,
''
).
rstrip
(
'/'
)
self
.
detailUrl
=
rule
.
get
(
'detailUrl'
,
''
).
rstrip
(
'/'
)
self
.
searchUrl
=
rule
.
get
(
'searchUrl'
,
''
)
self
.
searchUrl
=
rule
.
get
(
'searchUrl'
,
''
)
ua
=
rule
.
get
(
'ua'
,
''
)
ua
=
rule
.
get
(
'ua'
,
''
)
if
ua
==
'MOBILE_UA'
:
if
ua
==
'MOBILE_UA'
:
...
@@ -101,8 +102,9 @@ class CMS:
...
@@ -101,8 +102,9 @@ class CMS:
pdfh
=
jsp
.
pdfh
pdfh
=
jsp
.
pdfh
pdfa
=
jsp
.
pdfa
pdfa
=
jsp
.
pdfa
pd
=
jsp
.
pd
pd
=
jsp
.
pd
print
(
pdfh
(
r
.
text
,
p
[
0
]))
# print(pdfh(r.text,'body a.module-poster-item.module-item:eq(1)&&Text'))
# print(pdfh(r.text,'body a.module-poster-item.module-item:eq(0)'))
# print(pdfh(r.text,'body a.module-poster-item.module-item:first'))
items
=
pdfa
(
r
.
text
,
p
[
0
])
items
=
pdfa
(
r
.
text
,
p
[
0
])
videos
=
[]
videos
=
[]
for
item
in
items
:
for
item
in
items
:
...
@@ -127,6 +129,95 @@ class CMS:
...
@@ -127,6 +129,95 @@ class CMS:
result
[
'total'
]
=
999999
result
[
'total'
]
=
999999
return
result
return
result
def
detailContent
(
self
,
array
):
"""
cms二级数据
:param array:
:return:
"""
# video-info-header
fyid
=
array
[
0
]
url
=
self
.
detailUrl
.
replace
(
'fyid'
,
fyid
)
print
(
url
)
headers
=
{
'user-agent'
:
self
.
ua
}
r
=
requests
.
get
(
url
,
headers
=
headers
)
html
=
r
.
text
# print(html)
p
=
self
.
二级
# 解析
jsp
=
jsoup
(
self
.
url
)
pdfh
=
jsp
.
pdfh
pdfa
=
jsp
.
pdfa
pd
=
jsp
.
pd
pq
=
jsp
.
pq
obj
=
{}
vod_name
=
''
if
p
.
get
(
'title'
):
p1
=
p
[
'title'
].
split
(
';'
)
vod_name
=
pdfh
(
html
,
p1
[
0
]).
replace
(
'
\n
'
,
' '
)
title
=
'
\n
'
.
join
([
pdfh
(
html
,
i
).
replace
(
'
\n
'
,
' '
)
for
i
in
p1
])
# print(title)
obj
[
'title'
]
=
title
if
p
.
get
(
'desc'
):
p1
=
p
[
'desc'
].
split
(
';'
)
desc
=
'
\n
'
.
join
([
pdfh
(
html
,
i
).
replace
(
'
\n
'
,
' '
)
for
i
in
p1
])
obj
[
'desc'
]
=
desc
if
p
.
get
(
'content'
):
p1
=
p
[
'content'
].
split
(
';'
)
content
=
'
\n
'
.
join
([
pdfh
(
html
,
i
).
replace
(
'
\n
'
,
' '
)
for
i
in
p1
])
obj
[
'content'
]
=
content
if
p
.
get
(
'img'
):
p1
=
p
[
'img'
].
split
(
';'
)
img
=
'
\n
'
.
join
([
pdfh
(
html
,
i
).
replace
(
'
\n
'
,
' '
)
for
i
in
p1
])
obj
[
'img'
]
=
img
vod
=
{
"vod_id"
:
fyid
,
"vod_name"
:
vod_name
,
"vod_pic"
:
obj
.
get
(
'img'
,
''
),
"type_name"
:
obj
.
get
(
'title'
,
''
),
"vod_year"
:
""
,
"vod_area"
:
""
,
"vod_remarks"
:
obj
.
get
(
'desc'
,
''
),
"vod_actor"
:
""
,
"vod_director"
:
""
,
"vod_content"
:
obj
.
get
(
'content'
,
''
)
}
vod_play_from
=
'$$$'
playFrom
=
[]
if
p
.
get
(
'tabs'
):
vodHeader
=
pdfa
(
html
,
p
[
'tabs'
])
vodHeader
=
[
pq
(
v
).
text
()
for
v
in
vodHeader
]
else
:
vodHeader
=
[
'道长在线'
]
for
v
in
vodHeader
:
playFrom
.
append
(
v
)
vod_play_from
=
vod_play_from
.
join
(
playFrom
)
vod_play_url
=
'$$$'
vod_tab_list
=
[]
if
p
.
get
(
'lists'
):
for
i
in
range
(
len
(
vodHeader
)):
p1
=
p
[
'lists'
].
replace
(
'#id'
,
str
(
i
))
vodList
=
pdfa
(
html
,
p1
)
# 1条线路的选集列表
vodList
=
[
pq
(
i
).
text
()
+
'$'
+
pd
(
i
,
'a&&href'
)
for
i
in
vodList
]
# 拼接成 名称$链接
vlist
=
'#'
.
join
(
vodList
)
# 拼多个选集
vod_tab_list
.
append
(
vlist
)
vod_play_url
=
vod_play_url
.
join
(
vod_tab_list
)
# print(vod_play_url)
vod
[
'vod_play_from'
]
=
vod_play_from
vod
[
'vod_play_url'
]
=
vod_play_url
result
=
{
'list'
:
[
vod
]
}
return
result
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
from
utils
import
parser
from
utils
import
parser
js_path
=
f
'js/鸭奈飞.js'
js_path
=
f
'js/鸭奈飞.js'
...
@@ -134,5 +225,6 @@ if __name__ == '__main__':
...
@@ -134,5 +225,6 @@ if __name__ == '__main__':
rule
=
ctx
.
eval
(
'rule'
)
rule
=
ctx
.
eval
(
'rule'
)
cms
=
CMS
(
rule
)
cms
=
CMS
(
rule
)
print
(
cms
.
title
)
print
(
cms
.
title
)
print
(
cms
.
homeContent
())
# print(cms.homeContent())
cms
.
categoryContent
(
'dianying'
,
1
)
# cms.categoryContent('dianying',1)
\ No newline at end of file
print
(
cms
.
detailContent
([
'67391'
]))
\ No newline at end of file
utils/htmlParser.py
浏览文件 @
35c816d4
...
@@ -18,8 +18,8 @@ class jsoup:
...
@@ -18,8 +18,8 @@ class jsoup:
option
=
parse
.
split
(
'&&'
)[
1
]
option
=
parse
.
split
(
'&&'
)[
1
]
parse
=
parse
.
split
(
'&&'
)[
0
]
parse
=
parse
.
split
(
'&&'
)[
0
]
ret
=
doc
(
parse
)
if
option
:
if
option
:
ret
=
doc
(
parse
)
if
option
==
'Text'
:
if
option
==
'Text'
:
ret
=
ret
.
text
()
ret
=
ret
.
text
()
elif
option
==
'Html'
:
elif
option
==
'Html'
:
...
@@ -29,16 +29,22 @@ class jsoup:
...
@@ -29,16 +29,22 @@ class jsoup:
if
pd
and
option
in
[
'url'
,
'src'
,
'href'
,
'data-original'
]:
if
pd
and
option
in
[
'url'
,
'src'
,
'href'
,
'data-original'
]:
ret
=
urljoin
(
self
.
MY_URL
,
ret
)
ret
=
urljoin
(
self
.
MY_URL
,
ret
)
else
:
else
:
ret
=
ret
.
next
()
# ret = doc(parse+':first')
print
(
ret
)
ret
=
doc
(
parse
)
# 由于是生成器,直接转str就能拿到第一条数据,不需要next
ret
=
str
(
ret
(
'fisrt'
))
# ret = ret.next() # 取第一条数据
# ret = doc(parse) # 下面注释的写法不对的
# ret = ret.find(':first')
# ret = ret.children(':first')
ret
=
str
(
ret
)
return
ret
return
ret
def
pdfa
(
self
,
html
,
parse
):
def
pdfa
(
self
,
html
,
parse
):
doc
=
pq
(
html
)
doc
=
pq
(
html
)
# print(doc(parse)[0])
# return [item.html() for item in doc(parse).items()]
# return [item.html() for item in doc(parse).items()]
return
[
str
(
item
)
for
item
in
doc
(
parse
).
items
()]
return
[
str
(
item
)
for
item
in
doc
(
parse
).
items
()]
def
pd
(
self
,
html
,
parse
):
def
pd
(
self
,
html
,
parse
):
return
self
.
pdfh
(
html
,
parse
,
True
)
return
self
.
pdfh
(
html
,
parse
,
True
)
\ No newline at end of file
def
pq
(
self
,
html
):
return
pq
(
html
)
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录