drive.py 14.3 KB
Newer Older
H
Hsury 已提交
1 2 3 4 5 6 7 8
#!/usr/bin/env python3.7
# -*- coding: utf-8 -*-

import argparse
import hashlib
import json
import math
import os
H
Hsury 已提交
9
import re
H
Hsury 已提交
10
import requests
H
Hsury 已提交
11
import struct
H
Hsury 已提交
12
import threading
H
Hsury 已提交
13 14 15 16
import time
import types
from bilibili import Bilibili

H
Hsury 已提交
17 18
default_url = lambda sha1: f"http://i0.hdslb.com/bfs/album/{sha1}.x-ms-bmp"
meta_string = lambda url: ("bdrive://" + re.findall(r"[a-fA-F0-9]{40}", url)[0]) if re.match(r"^http(s?)://i0.hdslb.com/bfs/album/[a-fA-F0-9]{40}.x-ms-bmp$", url) else url
H
Hsury 已提交
19

H
Hsury 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
def bmp_header(data):
    return b"BM" \
        + struct.pack("<l", 14 + 40 + 8 + len(data)) \
        + b"\x00\x00" \
        + b"\x00\x00" \
        + b"\x3e\x00\x00\x00" \
        + b"\x28\x00\x00\x00" \
        + struct.pack("<l", len(data)) \
        + b"\x01\x00\x00\x00" \
        + b"\x01\x00" \
        + b"\x01\x00" \
        + b"\x00\x00\x00\x00" \
        + struct.pack("<l", math.ceil(len(data) / 8)) \
        + b"\x00\x00\x00\x00" \
        + b"\x00\x00\x00\x00" \
        + b"\x00\x00\x00\x00" \
        + b"\x00\x00\x00\x00" \
        + b"\x00\x00\x00\x00\xff\xff\xff\x00"
H
Hsury 已提交
38

H
Hsury 已提交
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
def calc_sha1(data, hexdigest=False):
    sha1 = hashlib.sha1()
    if isinstance(data, types.GeneratorType):
        for chunk in data:
            sha1.update(chunk)
    else:
        sha1.update(data)
    return sha1.hexdigest() if hexdigest else sha1.digest()

def fetch_meta(string):
    if re.match(r"^bdrive://[a-fA-F0-9]{40}$", string) or  re.match(r"^[a-fA-F0-9]{40}$", string):
        full_meta = image_download(default_url(re.findall(r'[a-fA-F0-9]{40}', string)[0]))
    elif string.startswith("http://") or string.startswith("https://"):
        full_meta = image_download(string)
    else:
        return None
    try:
        meta_dict = json.loads(full_meta[62:].decode("utf-8"))
        return meta_dict
    except:
        return None

H
Hsury 已提交
61
def image_upload(data, cookies):
H
Hsury 已提交
62 63 64 65 66 67 68
    url = "https://api.vc.bilibili.com/api/v1/drawImage/upload"
    headers = {
        'Origin': "https://t.bilibili.com",
        'Referer': "https://t.bilibili.com/",
        'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36",
    }
    files = {
H
Hsury 已提交
69
        'file_up': (f"{int(time.time() * 1000)}.bmp", data),
H
Hsury 已提交
70 71 72
        'biz': "draw",
        'category': "daily",
    }
H
Hsury 已提交
73 74 75 76
    try:
        response = requests.post(url, headers=headers, cookies=cookies, files=files).json()
    except:
        response = None
H
Hsury 已提交
77 78
    return response

H
Hsury 已提交
79
def image_download(url):
H
Hsury 已提交
80 81 82 83 84
    try:
        response = requests.get(url).content
    except:
        response = None
    return response
H
Hsury 已提交
85

H
Hsury 已提交
86 87 88 89
def log(message):
    print(f"[{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))}] {message}")

def read_history():
H
Hsury 已提交
90
    try:
H
Hsury 已提交
91 92
        with open("history.json", "r", encoding="utf-8") as f:
            history = json.loads(f.read())
H
Hsury 已提交
93
    except:
H
Hsury 已提交
94 95
        history = {}
    return history
H
Hsury 已提交
96

H
Hsury 已提交
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
def read_in_chunks(file_name, chunk_size=16 * 1024 * 1024):
    with open(file_name, "rb") as f:
        while True:
            data = f.read(chunk_size)
            if data != b"":
                yield data
            else:
                return

def history_handle(args):
    history = read_history()
    if history:
        for index, meta_dict in enumerate(history.values()):
            prefix = f"[{index}]"
            print(f"{prefix} {meta_dict['filename']} ({meta_dict['size'] / 1024 / 1024:.2f} MB), 共有{len(meta_dict['block'])}个分块, 上传于{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(meta_dict['time']))}")
            print(f"{' ' * len(prefix)} {meta_string(meta_dict['url'])}")
    else:
        print(f"暂无上传历史记录")
H
Hsury 已提交
115 116

def info_handle(args):
H
Hsury 已提交
117 118 119 120 121 122 123 124 125
    meta_dict = fetch_meta(args.meta)
    if meta_dict:
        log(f"文件名: {meta_dict['filename']}")
        log(f"大小: {meta_dict['size'] / 1024 / 1024:.2f} MB")
        log(f"SHA-1: {meta_dict['sha1']}")
        log(f"上传时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(meta_dict['time']))}")
        log(f"分块数: {len(meta_dict['block'])}")
        for index, block_dict in enumerate(meta_dict['block']):
            log(f"分块{index} ({block_dict['size'] / 1024 / 1024:.2f} MB) URL: {block_dict['url']}")
H
Hsury 已提交
126
    else:
H
Hsury 已提交
127 128
        log("元数据解析出错")

H
Hsury 已提交
129 130 131 132 133 134 135
def login_handle(args):
    bilibili = Bilibili()
    if bilibili.login(username=args.username, password=args.password):
        bilibili.get_user_info()
        with open("cookies.json", "w", encoding="utf-8") as f:
            f.write(json.dumps(bilibili.get_cookies(), ensure_ascii=False, indent=2))

H
Hsury 已提交
136
def upload_handle(args):
H
Hsury 已提交
137
    def core(index, block):
H
Hsury 已提交
138 139 140 141
        block_sha1 = calc_sha1(block, hexdigest=True)
        full_block = bmp_header(block) + block
        full_block_sha1 = calc_sha1(full_block, hexdigest=True)
        url = skippable(full_block_sha1)
H
Hsury 已提交
142
        if url:
H
Hsury 已提交
143
            # log(f"分块{index} ({len(block) / 1024 / 1024:.2f} MB) 已存在于服务器")
H
Hsury 已提交
144 145
            block_dict[index] = {
                'url': url,
H
Hsury 已提交
146
                'size': len(block),
H
Hsury 已提交
147 148 149 150
                'sha1': block_sha1,
            }
            done_flag.release()
        else:
H
Hsury 已提交
151 152
            # log(f"分块{index} ({len(block) / 1024 / 1024:.2f} MB) 开始上传")
            for _ in range(10):
H
Hsury 已提交
153
                response = image_upload(full_block, cookies)
H
Hsury 已提交
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
                if response:
                    if response['code'] == 0:
                        url = response['data']['image_url']
                        log(f"分块{index} ({len(block) / 1024 / 1024:.2f} MB) 上传完毕")
                        block_dict[index] = {
                            'url': url,
                            'size': len(block),
                            'sha1': block_sha1,
                        }
                        done_flag.release()
                        break
                    elif response['code'] == -4:
                        terminate_flag.set()
                        log(f"分块{index} ({len(block) / 1024 / 1024:.2f} MB) 第{_ + 1}次上传失败, 请重新登录")
                        break
                log(f"分块{index} ({len(block) / 1024 / 1024:.2f} MB) 第{_ + 1}次上传失败")
H
Hsury 已提交
170 171 172 173
            else:
                terminate_flag.set()

    def skippable(sha1):
H
Hsury 已提交
174
        url = default_url(sha1)
H
Hsury 已提交
175 176 177
        response = requests.head(url)
        return url if response.status_code == 200 else None

H
Hsury 已提交
178 179 180 181 182 183 184
    def write_history(meta_dict, url):
        history = read_history()
        history[meta_dict['sha1']] = meta_dict
        history[meta_dict['sha1']]['url'] = url
        with open("history.json", "w", encoding="utf-8") as f:
            f.write(json.dumps(history, ensure_ascii=False, indent=2))

H
Hsury 已提交
185 186
    start_time = time.time()
    try:
H
Hsury 已提交
187
        with open("cookies.json", "r", encoding="utf-8") as f:
H
Hsury 已提交
188 189
            cookies = json.loads(f.read())
    except:
H
Hsury 已提交
190
        log("Cookies加载失败, 请先登录")
H
Hsury 已提交
191 192
        return None
    file_name = args.file
H
Hsury 已提交
193
    log(f"上传: {os.path.basename(file_name)} ({os.path.getsize(file_name) / 1024 / 1024:.2f} MB)")
H
Hsury 已提交
194 195
    sha1 = calc_sha1(read_in_chunks(file_name), hexdigest=True)
    log(f"SHA-1: {sha1}")
H
Hsury 已提交
196 197 198 199 200 201
    history = read_history()
    if sha1 in history:
        url = history[sha1]['url']
        log(f"该文件已于{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(history[sha1]['time']))}上传, 共有{len(history[sha1]['block'])}个分块")
        log(meta_string(url))
        return url
H
Hsury 已提交
202
    log(f"线程数: {args.thread}")
H
Hsury 已提交
203 204 205 206
    done_flag = threading.Semaphore(0)
    terminate_flag = threading.Event()
    thread_pool = []
    block_dict = {}
H
Hsury 已提交
207
    for index, block in enumerate(read_in_chunks(file_name, chunk_size=args.block_size * 1024 * 1024)):
H
Hsury 已提交
208 209 210 211 212
        if len(thread_pool) >= args.thread:
            done_flag.acquire()
        if not terminate_flag.is_set():
            thread_pool.append(threading.Thread(target=core, args=(index, block)))
            thread_pool[-1].start()
H
Hsury 已提交
213
        else:
H
Hsury 已提交
214 215 216 217 218
            log("已终止上传, 等待线程回收")
    for thread in thread_pool:
        thread.join()
    if terminate_flag.is_set():
        return None
H
Hsury 已提交
219
    meta_dict = {
H
Hsury 已提交
220
        'time': int(time.time()),
H
Hsury 已提交
221
        'filename': os.path.basename(file_name),
H
Hsury 已提交
222
        'size': os.path.getsize(file_name),
H
Hsury 已提交
223 224
        'sha1': sha1,
        'block': [block_dict[i] for i in range(len(block_dict))],
H
Hsury 已提交
225
    }
H
Hsury 已提交
226 227
    meta = json.dumps(meta_dict, ensure_ascii=False).encode("utf-8")
    full_meta = bmp_header(meta) + meta
H
Hsury 已提交
228
    for _ in range(10):
H
Hsury 已提交
229
        response = image_upload(full_meta, cookies)
H
Hsury 已提交
230
        if response and response['code'] == 0:
H
Hsury 已提交
231
            url = response['data']['image_url']
H
Hsury 已提交
232
            log("元数据上传完毕")
H
Hsury 已提交
233
            log(f"{os.path.basename(file_name)}上传完毕, 共有{len(meta_dict['block'])}个分块, 用时{int(time.time() - start_time)}秒, 平均速度{meta_dict['size'] / 1024 / 1024 / (time.time() - start_time):.2f} MB/s")
H
Hsury 已提交
234 235
            log(meta_string(url))
            write_history(meta_dict, url)
H
Hsury 已提交
236
            return url
H
Hsury 已提交
237
        log(f"元数据第{_ + 1}次上传失败")
H
Hsury 已提交
238
    else:
H
Hsury 已提交
239
        return None
H
Hsury 已提交
240 241

def download_handle(args):
H
Hsury 已提交
242 243 244
    def core(index, block_dict):
        # log(f"分块{index} ({block_dict['size'] / 1024 / 1024:.2f} MB) 开始下载")
        for _ in range(10):
H
Hsury 已提交
245
            block = image_download(block_dict['url'])[62:]
H
Hsury 已提交
246 247 248 249 250 251 252 253 254 255 256 257 258
            if block:
                if calc_sha1(block, hexdigest=True) == block_dict['sha1']:
                    file_lock.acquire()
                    f.seek(block_offset(index))
                    f.write(block)
                    file_lock.release()
                    log(f"分块{index} ({block_dict['size'] / 1024 / 1024:.2f} MB) 下载完毕")
                    done_flag.release()
                    break
                else:
                    log(f"分块{index} ({block_dict['size'] / 1024 / 1024:.2f} MB) 校验未通过, SHA-1与元数据中的记录{block_dict['sha1']}不匹配")
            else:
                log(f"分块{index} ({block_dict['size'] / 1024 / 1024:.2f} MB) 第{_ + 1}次下载失败")
H
Hsury 已提交
259
        else:
H
Hsury 已提交
260 261 262 263
            terminate_flag.set()

    def block_offset(index):
        return sum(meta_dict['block'][i]['size'] for i in range(index))
H
Hsury 已提交
264

H
Hsury 已提交
265
    start_time = time.time()
H
Hsury 已提交
266 267 268
    meta_dict = fetch_meta(args.meta)
    if meta_dict:
        file_name = args.file if args.file else meta_dict['filename']
H
Hsury 已提交
269
        log(f"下载: {os.path.basename(file_name)} ({meta_dict['size'] / 1024 / 1024:.2f} MB), 共有{len(meta_dict['block'])}个分块, 上传于{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(meta_dict['time']))}")
H
Hsury 已提交
270
    else:
H
Hsury 已提交
271 272
        log("元数据解析出错")
        return None
H
Hsury 已提交
273
    log(f"线程数: {args.thread}")
H
Hsury 已提交
274
    download_block_list = []
H
Hsury 已提交
275 276
    if os.path.exists(file_name) and os.path.getsize(file_name) == meta_dict['size']:
        if calc_sha1(read_in_chunks(file_name), hexdigest=True) == meta_dict['sha1']:
H
Hsury 已提交
277
            log(f"{os.path.basename(file_name)}已存在于本地")
H
Hsury 已提交
278 279 280 281 282 283
            return file_name
        else:
            with open(file_name, "rb") as f:
                for index, block_dict in enumerate(meta_dict['block']):
                    f.seek(block_offset(index))
                    if calc_sha1(f.read(block_dict['size']), hexdigest=True) == block_dict['sha1']:
H
Hsury 已提交
284 285
                        # log(f"分块{index} ({block_dict['size'] / 1024 / 1024:.2f} MB) 已存在于本地")
                        pass
H
Hsury 已提交
286
                    else:
H
Hsury 已提交
287
                        # log(f"分块{index} ({block_dict['size'] / 1024 / 1024:.2f} MB) 需要重新下载")
H
Hsury 已提交
288 289 290
                        download_block_list.append(index)
    else:
        download_block_list = list(range(len(meta_dict['block'])))
H
Hsury 已提交
291 292 293 294
    done_flag = threading.Semaphore(0)
    terminate_flag = threading.Event()
    file_lock = threading.Lock()
    thread_pool = []
H
Hsury 已提交
295 296
    with open(file_name, "r+b" if os.path.exists(file_name) else "wb") as f:
        for index in download_block_list:
H
Hsury 已提交
297 298 299
            if len(thread_pool) >= args.thread:
                done_flag.acquire()
            if not terminate_flag.is_set():
H
Hsury 已提交
300
                thread_pool.append(threading.Thread(target=core, args=(index, meta_dict['block'][index])))
H
Hsury 已提交
301
                thread_pool[-1].start()
H
Hsury 已提交
302
            else:
H
Hsury 已提交
303 304 305 306 307
                log("已终止下载, 等待线程回收")
        for thread in thread_pool:
            thread.join()
        if terminate_flag.is_set():
            return None
H
Hsury 已提交
308 309 310 311
        f.truncate(sum(block['size'] for block in meta_dict['block']))
    sha1 = calc_sha1(read_in_chunks(file_name), hexdigest=True)
    log(f"SHA-1: {sha1}")
    if sha1 == meta_dict['sha1']:
H
Hsury 已提交
312 313
        log(f"{os.path.basename(file_name)}校验通过")
        log(f"{os.path.basename(file_name)}下载完毕, 用时{int(time.time() - start_time)}秒, 平均速度{meta_dict['size'] / 1024 / 1024 / (time.time() - start_time):.2f} MB/s")
H
Hsury 已提交
314
        return file_name
H
Hsury 已提交
315
    else:
H
Hsury 已提交
316
        log(f"{os.path.basename(file_name)}校验未通过, SHA-1与元数据中的记录{meta_dict['sha1']}不匹配")
H
Hsury 已提交
317
        return None
H
Hsury 已提交
318 319

if __name__ == "__main__":
H
Hsury 已提交
320
    parser = argparse.ArgumentParser(prog="BiliDrive", description="Bilibili Drive", epilog="By Hsury, 2019/10/25")
H
Hsury 已提交
321
    subparsers = parser.add_subparsers()
H
Hsury 已提交
322 323 324 325 326
    history_parser = subparsers.add_parser("history", help="view upload history")
    history_parser.set_defaults(func=history_handle)
    info_parser = subparsers.add_parser("info", help="view meta info")
    info_parser.add_argument("meta", help="meta url")
    info_parser.set_defaults(func=info_handle)
H
Hsury 已提交
327 328 329 330 331 332
    login_parser = subparsers.add_parser("login", help="login to bilibili")
    login_parser.add_argument("username", help="username")
    login_parser.add_argument("password", help="password")
    login_parser.set_defaults(func=login_handle)
    upload_parser = subparsers.add_parser("upload", help="upload a file")
    upload_parser.add_argument("file", help="file name")
H
Hsury 已提交
333
    upload_parser.add_argument("-b", "--block-size", default=4, type=int, help="block size in MB")
H
Hsury 已提交
334
    upload_parser.add_argument("-t", "--thread", default=4, type=int, help="thread number")
H
Hsury 已提交
335 336
    upload_parser.set_defaults(func=upload_handle)
    download_parser = subparsers.add_parser("download", help="download a file")
H
Hsury 已提交
337 338
    download_parser.add_argument("meta", help="meta url")
    download_parser.add_argument("file", nargs="?", default="", help="save as file name")
H
Hsury 已提交
339
    download_parser.add_argument("-t", "--thread", default=8, type=int, help="thread number")
H
Hsury 已提交
340 341 342 343 344 345
    download_parser.set_defaults(func=download_handle)
    args = parser.parse_args()
    try:
        args.func(args)
    except AttributeError:
        parser.print_help()