bilibili.py 5.8 KB
Newer Older
M
Mort Yao 已提交
1 2 3 4 5 6
#!/usr/bin/env python

__all__ = ['bilibili_download']

from ..common import *

M
Mort Yao 已提交
7
from .sina import sina_download_by_vid
M
Mort Yao 已提交
8
from .tudou import tudou_download_by_id
M
Mort Yao 已提交
9
from .youku import youku_download_by_vid
M
Mort Yao 已提交
10

M
Mort Yao 已提交
11
import hashlib
M
Mort Yao 已提交
12 13
import re

M
Mort Yao 已提交
14 15 16
# API key provided by cnbeining
appkey='85eb6835b0a1034e';
secretkey = '2ad42749773c441109bdc0191257a664'
17 18 19 20 21 22 23
client = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Charset': 'UTF-8,*;q=0.5',
    'Accept-Encoding': 'gzip,deflate,sdch',
    'Accept-Language': 'en-US,en;q=0.8',
    'User-Agent': 'Biligrab /0.8 (cnbeining@gmail.com)'
}
M
Mort Yao 已提交
24

M
Mort Yao 已提交
25
def get_srt_xml(id):
M
Mort Yao 已提交
26
    url = 'http://comment.bilibili.com/%s.xml' % id
M
Mort Yao 已提交
27 28 29 30 31 32 33
    return get_html(url)

def parse_srt_p(p):
    fields = p.split(',')
    assert len(fields) == 8, fields
    time, mode, font_size, font_color, pub_time, pool, user_id, history = fields
    time = float(time)
34

M
Mort Yao 已提交
35 36 37 38 39 40 41 42
    mode = int(mode)
    assert 1 <= mode <= 8
    # mode 1~3: scrolling
    # mode 4: bottom
    # mode 5: top
    # mode 6: reverse?
    # mode 7: position
    # mode 8: advanced
43

M
Mort Yao 已提交
44 45 46 47 48
    pool = int(pool)
    assert 0 <= pool <= 2
    # pool 0: normal
    # pool 1: srt
    # pool 2: special?
49

M
Mort Yao 已提交
50
    font_size = int(font_size)
51

M
Mort Yao 已提交
52
    font_color = '#%06x' % int(font_color)
53

M
Mort Yao 已提交
54 55 56 57 58 59 60 61
    return pool, mode, font_size, font_color

def parse_srt_xml(xml):
    d = re.findall(r'<d p="([^"]+)">(.*)</d>', xml)
    for x, y in d:
        p = parse_srt_p(x)
    raise NotImplementedError()

62 63
def parse_cid_playurl(xml):
    from xml.dom.minidom import parseString
M
Mort Yao 已提交
64 65 66 67 68 69
    try:
        doc = parseString(xml.encode('utf-8'))
        urls = [durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl')]
        return urls
    except:
        return []
70

71
def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only=False):
M
Mort Yao 已提交
72
    urls = []
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
    for cid in cids:
        sign_this = hashlib.md5(bytes('appkey=' + appkey + '&cid=' + cid + secretkey, 'utf-8')).hexdigest()
        url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + cid + '&sign=' + sign_this
        urls += [i
                if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
                else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
                for i in parse_cid_playurl(get_content(url, headers=client))]

    if re.search(r'\.(flv|hlv)\b', urls[0]):
        type = 'flv'
    elif re.search(r'/flv/', urls[0]):
        type = 'flv'
    elif re.search(r'/mp4/', urls[0]):
        type = 'mp4'
    else:
        type = 'flv'

    size = 0
    for url in urls:
        _, _, temp = url_info(url)
        size += temp

    print_info(site_info, title, type, size)
    if not info_only:
        download_urls(urls, title, type, total_size=None, output_dir=output_dir, merge=merge)

def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=False):
M
Mort Yao 已提交
100 101
    sign_this = hashlib.md5(bytes('appkey=' + appkey + '&cid=' + id + secretkey, 'utf-8')).hexdigest()
    url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + id + '&sign=' + sign_this
102 103 104 105
    urls = [i
            if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
            else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
            for i in parse_cid_playurl(get_content(url, headers=client))]
106

M
Mort Yao 已提交
107 108
    if re.search(r'\.(flv|hlv)\b', urls[0]):
        type = 'flv'
N
NStal 已提交
109 110
    elif re.search(r'/flv/', urls[0]):
        type = 'flv'
M
Mort Yao 已提交
111 112 113
    elif re.search(r'/mp4/', urls[0]):
        type = 'mp4'
    else:
M
Mort Yao 已提交
114
        type = 'flv'
115

116 117 118 119
    size = 0
    for url in urls:
        _, _, temp = url_info(url)
        size += temp
120

M
Mort Yao 已提交
121
    print_info(site_info, title, type, size)
122
    if not info_only:
123
        download_urls(urls, title, type, total_size=None, output_dir=output_dir, merge=merge)
124

125
def bilibili_download(url, output_dir='.', merge=True, info_only=False):
M
Mort Yao 已提交
126
    html = get_html(url)
127

P
pl 已提交
128
    title = r1_of([r'<meta name="title" content="([^<>]{1,999})" />',r'<h2[^>]*>([^<>]+)</h2>'], html)
M
Mort Yao 已提交
129 130
    title = unescape_html(title)
    title = escape_file_path(title)
131

P
pl 已提交
132
    flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
M
Mort Yao 已提交
133
    assert flashvars
P
pl 已提交
134
    flashvars = flashvars.replace(': ','=')
M
Mort Yao 已提交
135
    t, id = flashvars.split('=', 1)
136
    id = id.split('&')[0]
137
    if t == 'cid':
138
        # Multi-P
M
Mort Yao 已提交
139
        cids = [id]
140
        p = re.findall('<option value=\'([^\']*)\'>', html)
J
jackyzy823 已提交
141 142 143 144 145
        if not p:
            bilibili_download_by_cid(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
        else:
            for i in p:
                html = get_html("http://www.bilibili.com%s" % i)
M
Mort Yao 已提交
146 147 148 149
                flashvars = r1_of([r'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
                if flashvars:
                    t, cid = flashvars.split('=', 1)
                    cids.append(cid.split('&')[0])
J
jackyzy823 已提交
150
            bilibili_download_by_cids(cids, title, output_dir=output_dir, merge=merge, info_only=info_only)
151

152
    elif t == 'vid':
M
Mort Yao 已提交
153 154
        sina_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
    elif t == 'ykid':
M
Mort Yao 已提交
155
        youku_download_by_vid(id, title=title, output_dir = output_dir, merge = merge, info_only = info_only)
M
Mort Yao 已提交
156 157 158 159
    elif t == 'uid':
        tudou_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
    else:
        raise NotImplementedError(flashvars)
160

161
    if not info_only:
162
        title = get_filename(title)
163
        print('Downloading %s ...\n' % (title + '.cmt.xml'))
164
        xml = get_srt_xml(id)
M
Mort Yao 已提交
165
        with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x:
166
            x.write(xml)
M
Mort Yao 已提交
167

M
Mort Yao 已提交
168
site_info = "bilibili.com"
M
Mort Yao 已提交
169 170
download = bilibili_download
download_playlist = playlist_not_supported('bilibili')