Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
无聊人士张
you-get
提交
07a224ed
Y
you-get
项目概览
无聊人士张
/
you-get
与 Fork 源项目一致
从无法访问的项目Fork
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Y
you-get
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
07a224ed
编写于
7月 21, 2014
作者:
M
Mort Yao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Catfun: reformat
上级
f78e4c80
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
45 addition
and
49 deletion
+45
-49
src/you_get/extractors/catfun.py
src/you_get/extractors/catfun.py
+45
-49
未找到文件。
src/you_get/extractors/catfun.py
浏览文件 @
07a224ed
...
...
@@ -8,73 +8,69 @@ from ..common import *
from
xml.dom.minidom
import
*
def
parse_item
(
item
):
if
item
[
"type"
]
==
"youku"
:
page
=
get_content
(
"http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_youku_video_info&youku_id="
+
item
[
"vid"
])
dom
=
parseString
(
page
)
ext
=
dom
.
getElementsByTagName
(
"format"
)[
0
].
firstChild
.
nodeValue
;
size
=
0
urls
=
[]
if
item
[
"type"
]
==
"youku"
:
page
=
get_content
(
"http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_youku_video_info&youku_id="
+
item
[
"vid"
])
dom
=
parseString
(
page
)
ext
=
dom
.
getElementsByTagName
(
"format"
)[
0
].
firstChild
.
nodeValue
;
size
=
0
urls
=
[]
for
i
in
dom
.
getElementsByTagName
(
"durl"
):
urls
.
append
(
i
.
getElementsByTagName
(
"url"
)[
0
].
firstChild
.
nodeValue
)
size
+=
int
(
i
.
getElementsByTagName
(
"size"
)[
0
].
firstChild
.
nodeValue
);
return
urls
,
ext
,
size
pass
size
+=
int
(
i
.
getElementsByTagName
(
"size"
)[
0
].
firstChild
.
nodeValue
);
return
urls
,
ext
,
size
elif
item
[
"type"
]
==
"qq"
:
page
=
get_content
(
"http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_qq_video_info&qq_id="
+
item
[
"vid"
])
dom
=
parseString
(
page
)
size
=
0
urls
=
[]
elif
item
[
"type"
]
==
"qq"
:
page
=
get_content
(
"http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_qq_video_info&qq_id="
+
item
[
"vid"
])
dom
=
parseString
(
page
)
size
=
0
urls
=
[]
for
i
in
dom
.
getElementsByTagName
(
"durl"
):
url
=
i
.
getElementsByTagName
(
"url"
)[
0
].
firstChild
.
nodeValue
urls
.
append
(
url
)
vtype
,
ext
,
_size
=
url_info
(
url
)
size
+=
_size
return
urls
,
ext
,
size
pass
url
=
i
.
getElementsByTagName
(
"url"
)[
0
].
firstChild
.
nodeValue
urls
.
append
(
url
)
vtype
,
ext
,
_size
=
url_info
(
url
)
size
+=
_size
return
urls
,
ext
,
size
elif
item
[
"type"
]
==
"sina"
:
page
=
get_content
(
"http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_sina_video_info&sina_id="
+
item
[
"vid"
])
elif
item
[
"type"
]
==
"sina"
:
page
=
get_content
(
"http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_sina_video_info&sina_id="
+
item
[
"vid"
])
try
:
dom
=
parseString
(
page
)
dom
=
parseString
(
page
)
except
:
#refresh page encountered
page
=
get_content
(
match1
(
page
,
r
'url=(.+?)"'
))
dom
=
parseString
(
page
)
size
=
0
urls
=
[]
page
=
get_content
(
match1
(
page
,
r
'url=(.+?)"'
))
dom
=
parseString
(
page
)
size
=
0
urls
=
[]
for
i
in
dom
.
getElementsByTagName
(
"durl"
):
url
=
i
.
getElementsByTagName
(
"url"
)[
0
].
firstChild
.
nodeValue
url
=
i
.
getElementsByTagName
(
"url"
)[
0
].
firstChild
.
nodeValue
urls
.
append
(
url
)
vtype
,
ext
,
_size
=
url_info
(
url
)
vtype
,
ext
,
_size
=
url_info
(
url
)
if
not
ext
:
ext
=
match1
(
url
,
r
'\.(\w+?)\?'
)
size
+=
_size
#sina's result does not contains content-type
return
urls
,
ext
,
size
pass
ext
=
match1
(
url
,
r
'\.(\w+?)\?'
)
size
+=
_size
#sina's result does not contains content-type
return
urls
,
ext
,
size
def
catfun_download
(
url
,
output_dir
=
'.'
,
merge
=
True
,
info_only
=
False
):
# html
=
get_content(url)
title
=
match1
(
get_content
(
url
),
r
'<h1 class="title">(.+?)</h1>'
)
vid
=
match1
(
url
,
r
"v\d+/cat(\d+)"
)
j
=
json
.
loads
(
get_content
(
"http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_video&modelid=11&id={}"
.
format
(
vid
)))
# html
=
get_content(url)
title
=
match1
(
get_content
(
url
),
r
'<h1 class="title">(.+?)</h1>'
)
vid
=
match1
(
url
,
r
"v\d+/cat(\d+)"
)
j
=
json
.
loads
(
get_content
(
"http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_video&modelid=11&id={}"
.
format
(
vid
)))
for
item
in
j
:
if
item
[
"name"
]
!=
"
\u672a\u547d\u540d
1"
:
t
=
title
+
"-"
+
item
[
"name"
]
if
item
[
"name"
]
!=
"
\u672a\u547d\u540d
1"
:
t
=
title
+
"-"
+
item
[
"name"
]
else
:
t
=
title
if
item
[
"type"
]
==
"tudou"
:
t
=
title
if
item
[
"type"
]
==
"tudou"
:
tudou_download_by_id
(
item
[
"vid"
],
title
,
output_dir
,
merge
,
info_only
)
else
:
urls
,
ext
,
size
=
parse_item
(
item
)
download_urls
(
urls
,
t
,
ext
,
size
,
output_dir
)
urls
,
ext
,
size
=
parse_item
(
item
)
print_info
(
site_info
,
title
,
ext
,
size
)
if
not
info_only
:
download_urls
(
urls
,
t
,
ext
,
size
,
output_dir
,
merge
=
merge
)
site_info
=
"
catfun.com
"
site_info
=
"
CatFun.tv
"
download
=
catfun_download
download_playlist
=
playlist_not_supported
(
'catfun'
)
\ No newline at end of file
download_playlist
=
playlist_not_supported
(
'catfun'
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录