bilibili.py 4.0 KB
Newer Older
M
Mort Yao 已提交
1 2 3 4 5 6
#!/usr/bin/env python

__all__ = ['bilibili_download']

from ..common import *

M
Mort Yao 已提交
7
from .sina import sina_download_by_vid
M
Mort Yao 已提交
8
from .tudou import tudou_download_by_id
M
Mort Yao 已提交
9
from .youku import youku_download_by_vid
M
Mort Yao 已提交
10

M
Mort Yao 已提交
11
import hashlib
M
Mort Yao 已提交
12 13
import re

M
Mort Yao 已提交
14 15 16
# API key provided by cnbeining
appkey='85eb6835b0a1034e';
secretkey = '2ad42749773c441109bdc0191257a664'
17 18 19 20 21 22 23
client = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Charset': 'UTF-8,*;q=0.5',
    'Accept-Encoding': 'gzip,deflate,sdch',
    'Accept-Language': 'en-US,en;q=0.8',
    'User-Agent': 'Biligrab /0.8 (cnbeining@gmail.com)'
}
M
Mort Yao 已提交
24

M
Mort Yao 已提交
25
def get_srt_xml(id):
M
Mort Yao 已提交
26
    url = 'http://comment.bilibili.com/%s.xml' % id
M
Mort Yao 已提交
27 28 29 30 31 32 33
    return get_html(url)

def parse_srt_p(p):
    fields = p.split(',')
    assert len(fields) == 8, fields
    time, mode, font_size, font_color, pub_time, pool, user_id, history = fields
    time = float(time)
34

M
Mort Yao 已提交
35 36 37 38 39 40 41 42
    mode = int(mode)
    assert 1 <= mode <= 8
    # mode 1~3: scrolling
    # mode 4: bottom
    # mode 5: top
    # mode 6: reverse?
    # mode 7: position
    # mode 8: advanced
43

M
Mort Yao 已提交
44 45 46 47 48
    pool = int(pool)
    assert 0 <= pool <= 2
    # pool 0: normal
    # pool 1: srt
    # pool 2: special?
49

M
Mort Yao 已提交
50
    font_size = int(font_size)
51

M
Mort Yao 已提交
52
    font_color = '#%06x' % int(font_color)
53

M
Mort Yao 已提交
54 55 56 57 58 59 60 61
    return pool, mode, font_size, font_color

def parse_srt_xml(xml):
    d = re.findall(r'<d p="([^"]+)">(.*)</d>', xml)
    for x, y in d:
        p = parse_srt_p(x)
    raise NotImplementedError()

62 63 64 65 66 67 68
def parse_cid_playurl(xml):
    from xml.dom.minidom import parseString
    doc = parseString(xml.encode('utf-8'))
    urls = [durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl')]
    return urls

def bilibili_download_by_cid(id, title, output_dir = '.', merge = True, info_only = False):
M
Mort Yao 已提交
69 70
    sign_this = hashlib.md5(bytes('appkey=' + appkey + '&cid=' + id + secretkey, 'utf-8')).hexdigest()
    url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + id + '&sign=' + sign_this
71 72
    urls = [i if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i) else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i) for i in parse_cid_playurl(get_content(url, headers=client))]
        #get_html(url, 'utf-8'))] # dirty fix for QQ
73

M
Mort Yao 已提交
74 75
    if re.search(r'\.(flv|hlv)\b', urls[0]):
        type = 'flv'
N
NStal 已提交
76 77
    elif re.search(r'/flv/', urls[0]):
        type = 'flv'
M
Mort Yao 已提交
78 79 80
    elif re.search(r'/mp4/', urls[0]):
        type = 'mp4'
    else:
M
Mort Yao 已提交
81
        type = 'flv'
82

83 84 85 86
    size = 0
    for url in urls:
        _, _, temp = url_info(url)
        size += temp
87

M
Mort Yao 已提交
88
    print_info(site_info, title, type, size)
89
    if not info_only:
M
Mort Yao 已提交
90
        download_urls(urls, title, type, total_size = None, output_dir = output_dir, merge = merge)
91

M
Mort Yao 已提交
92 93
def bilibili_download(url, output_dir = '.', merge = True, info_only = False):
    html = get_html(url)
94

95
    title = r1(r'<h2[^>]*>([^<>]+)</h2>', html)
M
Mort Yao 已提交
96 97
    title = unescape_html(title)
    title = escape_file_path(title)
98

M
Mort Yao 已提交
99
    flashvars = r1_of([r'player_params=\'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
M
Mort Yao 已提交
100 101
    assert flashvars
    t, id = flashvars.split('=', 1)
102
    id = id.split('&')[0]
103 104 105
    if t == 'cid':
        bilibili_download_by_cid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
    elif t == 'vid':
M
Mort Yao 已提交
106 107
        sina_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
    elif t == 'ykid':
M
Mort Yao 已提交
108
        youku_download_by_vid(id, title=title, output_dir = output_dir, merge = merge, info_only = info_only)
M
Mort Yao 已提交
109 110 111 112
    elif t == 'uid':
        tudou_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
    else:
        raise NotImplementedError(flashvars)
113

114
    if not info_only:
115
        title = get_filename(title)
116
        print('Downloading %s ...\n' % (title + '.cmt.xml'))
117
        xml = get_srt_xml(id)
M
Mort Yao 已提交
118
        with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x:
119
            x.write(xml)
M
Mort Yao 已提交
120

M
Mort Yao 已提交
121
site_info = "bilibili.com"
M
Mort Yao 已提交
122 123
download = bilibili_download
download_playlist = playlist_not_supported('bilibili')