bilibili.py 5.6 KB
Newer Older
M
Mort Yao 已提交
1 2 3 4 5 6
#!/usr/bin/env python

__all__ = ['bilibili_download']

from ..common import *

M
Mort Yao 已提交
7
from .sina import sina_download_by_vid
M
Mort Yao 已提交
8
from .tudou import tudou_download_by_id
M
Mort Yao 已提交
9
from .youku import youku_download_by_vid
M
Mort Yao 已提交
10

M
Mort Yao 已提交
11
import hashlib
M
Mort Yao 已提交
12 13
import re

M
Mort Yao 已提交
14 15 16
# API key provided by cnbeining
appkey='85eb6835b0a1034e';
secretkey = '2ad42749773c441109bdc0191257a664'
17 18 19 20 21 22 23
client = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Charset': 'UTF-8,*;q=0.5',
    'Accept-Encoding': 'gzip,deflate,sdch',
    'Accept-Language': 'en-US,en;q=0.8',
    'User-Agent': 'Biligrab /0.8 (cnbeining@gmail.com)'
}
M
Mort Yao 已提交
24

M
Mort Yao 已提交
25
def get_srt_xml(id):
M
Mort Yao 已提交
26
    url = 'http://comment.bilibili.com/%s.xml' % id
M
Mort Yao 已提交
27 28 29 30 31 32 33
    return get_html(url)

def parse_srt_p(p):
    fields = p.split(',')
    assert len(fields) == 8, fields
    time, mode, font_size, font_color, pub_time, pool, user_id, history = fields
    time = float(time)
34

M
Mort Yao 已提交
35 36 37 38 39 40 41 42
    mode = int(mode)
    assert 1 <= mode <= 8
    # mode 1~3: scrolling
    # mode 4: bottom
    # mode 5: top
    # mode 6: reverse?
    # mode 7: position
    # mode 8: advanced
43

M
Mort Yao 已提交
44 45 46 47 48
    pool = int(pool)
    assert 0 <= pool <= 2
    # pool 0: normal
    # pool 1: srt
    # pool 2: special?
49

M
Mort Yao 已提交
50
    font_size = int(font_size)
51

M
Mort Yao 已提交
52
    font_color = '#%06x' % int(font_color)
53

M
Mort Yao 已提交
54 55 56 57 58 59 60 61
    return pool, mode, font_size, font_color

def parse_srt_xml(xml):
    d = re.findall(r'<d p="([^"]+)">(.*)</d>', xml)
    for x, y in d:
        p = parse_srt_p(x)
    raise NotImplementedError()

62 63 64 65 66 67
def parse_cid_playurl(xml):
    from xml.dom.minidom import parseString
    doc = parseString(xml.encode('utf-8'))
    urls = [durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl')]
    return urls

68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only=False):
    for cid in cids:
        sign_this = hashlib.md5(bytes('appkey=' + appkey + '&cid=' + cid + secretkey, 'utf-8')).hexdigest()
        url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + cid + '&sign=' + sign_this
        urls += [i
                if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
                else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
                for i in parse_cid_playurl(get_content(url, headers=client))]

    if re.search(r'\.(flv|hlv)\b', urls[0]):
        type = 'flv'
    elif re.search(r'/flv/', urls[0]):
        type = 'flv'
    elif re.search(r'/mp4/', urls[0]):
        type = 'mp4'
    else:
        type = 'flv'

    size = 0
    for url in urls:
        _, _, temp = url_info(url)
        size += temp

    print_info(site_info, title, type, size)
    if not info_only:
        download_urls(urls, title, type, total_size=None, output_dir=output_dir, merge=merge)

def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=False):
M
Mort Yao 已提交
96 97
    sign_this = hashlib.md5(bytes('appkey=' + appkey + '&cid=' + id + secretkey, 'utf-8')).hexdigest()
    url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + id + '&sign=' + sign_this
98 99 100 101
    urls = [i
            if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
            else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
            for i in parse_cid_playurl(get_content(url, headers=client))]
102

M
Mort Yao 已提交
103 104
    if re.search(r'\.(flv|hlv)\b', urls[0]):
        type = 'flv'
N
NStal 已提交
105 106
    elif re.search(r'/flv/', urls[0]):
        type = 'flv'
M
Mort Yao 已提交
107 108 109
    elif re.search(r'/mp4/', urls[0]):
        type = 'mp4'
    else:
M
Mort Yao 已提交
110
        type = 'flv'
111

112 113 114 115
    size = 0
    for url in urls:
        _, _, temp = url_info(url)
        size += temp
116

M
Mort Yao 已提交
117
    print_info(site_info, title, type, size)
118
    if not info_only:
119
        download_urls(urls, title, type, total_size=None, output_dir=output_dir, merge=merge)
120

121
def bilibili_download(url, output_dir='.', merge=True, info_only=False):
M
Mort Yao 已提交
122
    html = get_html(url)
123

124
    title = r1(r'<h2[^>]*>([^<>]+)</h2>', html)
M
Mort Yao 已提交
125 126
    title = unescape_html(title)
    title = escape_file_path(title)
127

128
    flashvars = r1_of([r'(cid=\d+)', r'player_params=\'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
M
Mort Yao 已提交
129 130
    assert flashvars
    t, id = flashvars.split('=', 1)
131
    id = id.split('&')[0]
132
    if t == 'cid':
133 134 135
        # Multi-P
        cids = []
        p = re.findall('<option value=\'([^\']*)\'>', html)
J
jackyzy823 已提交
136 137 138 139 140 141 142 143 144
        if not p:
            bilibili_download_by_cid(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
        else:
            for i in p:
                html = get_html("http://www.bilibili.com%s" % i)
                flashvars = r1_of([r'player_params=\'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
                t, cid = flashvars.split('=', 1)
                cids.append(cid.split('&')[0])
            bilibili_download_by_cids(cids, title, output_dir=output_dir, merge=merge, info_only=info_only)
145

146
    elif t == 'vid':
M
Mort Yao 已提交
147 148
        sina_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
    elif t == 'ykid':
M
Mort Yao 已提交
149
        youku_download_by_vid(id, title=title, output_dir = output_dir, merge = merge, info_only = info_only)
M
Mort Yao 已提交
150 151 152 153
    elif t == 'uid':
        tudou_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
    else:
        raise NotImplementedError(flashvars)
154

155
    if not info_only:
156
        title = get_filename(title)
157
        print('Downloading %s ...\n' % (title + '.cmt.xml'))
158
        xml = get_srt_xml(id)
M
Mort Yao 已提交
159
        with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x:
160
            x.write(xml)
M
Mort Yao 已提交
161

M
Mort Yao 已提交
162
site_info = "bilibili.com"
M
Mort Yao 已提交
163 164
download = bilibili_download
download_playlist = playlist_not_supported('bilibili')