drive.py 17.3 KB
Newer Older
H
Hsury 已提交
1 2 3 4 5 6 7 8
#!/usr/bin/env python3.7
# -*- coding: utf-8 -*-

import argparse
import hashlib
import json
import math
import os
H
Hsury 已提交
9
import re
H
Hsury 已提交
10
import requests
H
Hsury 已提交
11
import shlex
H
Hsury 已提交
12
import signal
H
Hsury 已提交
13
import struct
H
Hsury 已提交
14
import sys
H
Hsury 已提交
15
import threading
H
Hsury 已提交
16
import time
H
Hsury 已提交
17
import traceback
H
Hsury 已提交
18 19 20
import types
from bilibili import Bilibili

H
Hsury 已提交
21 22
bundle_dir = os.path.dirname(sys.executable) if getattr(sys, "frozen", False) else os.path.dirname(os.path.abspath(__file__))

H
Hsury 已提交
23 24
default_url = lambda sha1: f"http://i0.hdslb.com/bfs/album/{sha1}.x-ms-bmp"
meta_string = lambda url: ("bdrive://" + re.findall(r"[a-fA-F0-9]{40}", url)[0]) if re.match(r"^http(s?)://i0.hdslb.com/bfs/album/[a-fA-F0-9]{40}.x-ms-bmp$", url) else url
H
Hsury 已提交
25

H
Hsury 已提交
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
def bmp_header(data):
    return b"BM" \
        + struct.pack("<l", 14 + 40 + 8 + len(data)) \
        + b"\x00\x00" \
        + b"\x00\x00" \
        + b"\x3e\x00\x00\x00" \
        + b"\x28\x00\x00\x00" \
        + struct.pack("<l", len(data)) \
        + b"\x01\x00\x00\x00" \
        + b"\x01\x00" \
        + b"\x01\x00" \
        + b"\x00\x00\x00\x00" \
        + struct.pack("<l", math.ceil(len(data) / 8)) \
        + b"\x00\x00\x00\x00" \
        + b"\x00\x00\x00\x00" \
        + b"\x00\x00\x00\x00" \
        + b"\x00\x00\x00\x00" \
        + b"\x00\x00\x00\x00\xff\xff\xff\x00"
H
Hsury 已提交
44

H
Hsury 已提交
45 46 47 48 49 50 51 52 53 54
def calc_sha1(data, hexdigest=False):
    sha1 = hashlib.sha1()
    if isinstance(data, types.GeneratorType):
        for chunk in data:
            sha1.update(chunk)
    else:
        sha1.update(data)
    return sha1.hexdigest() if hexdigest else sha1.digest()

def fetch_meta(string):
H
Hsury 已提交
55 56
    if re.match(r"^bdrive://[a-fA-F0-9]{40}$", string) or re.match(r"^[a-fA-F0-9]{40}$", string):
        full_meta = image_download(default_url(re.findall(r"[a-fA-F0-9]{40}", string)[0]))
H
Hsury 已提交
57 58 59 60 61 62 63 64 65 66
    elif string.startswith("http://") or string.startswith("https://"):
        full_meta = image_download(string)
    else:
        return None
    try:
        meta_dict = json.loads(full_meta[62:].decode("utf-8"))
        return meta_dict
    except:
        return None

H
Hsury 已提交
67
def image_upload(data, cookies):
H
Hsury 已提交
68 69 70 71
    url = "https://api.vc.bilibili.com/api/v1/drawImage/upload"
    headers = {
        'Origin': "https://t.bilibili.com",
        'Referer': "https://t.bilibili.com/",
H
Hsury 已提交
72
        'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36",
H
Hsury 已提交
73 74
    }
    files = {
H
Hsury 已提交
75
        'file_up': (f"{int(time.time() * 1000)}.bmp", data),
H
Hsury 已提交
76 77 78
        'biz': "draw",
        'category': "daily",
    }
H
Hsury 已提交
79
    try:
H
Hsury 已提交
80
        response = requests.post(url, headers=headers, cookies=cookies, files=files).json()
H
Hsury 已提交
81 82
    except:
        response = None
H
Hsury 已提交
83 84
    return response

H
Hsury 已提交
85
def image_download(url):
H
Hsury 已提交
86 87 88 89 90 91
    headers = {
        'Referer': "http://t.bilibili.com/",
        'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36",
    }
    content = []
    last_chunk_time = None
H
Hsury 已提交
92
    try:
H
Hsury 已提交
93
        for chunk in requests.get(url, headers=headers, timeout=10, stream=True).iter_content(64 * 1024):
H
Hsury 已提交
94 95 96 97 98
            if last_chunk_time is not None and time.time() - last_chunk_time > 5:
                return None
            content.append(chunk)
            last_chunk_time = time.time()
        return b"".join(content)
H
Hsury 已提交
99
    except:
H
Hsury 已提交
100
        return None
H
Hsury 已提交
101

H
Hsury 已提交
102
def log(message):
H
Hsury 已提交
103
    Bilibili._log(message)
H
Hsury 已提交
104 105

def read_history():
H
Hsury 已提交
106
    try:
H
Hsury 已提交
107
        with open(os.path.join(bundle_dir, "history.json"), "r", encoding="utf-8") as f:
H
Hsury 已提交
108
            history = json.loads(f.read())
H
Hsury 已提交
109
    except:
H
Hsury 已提交
110 111
        history = {}
    return history
H
Hsury 已提交
112

H
Hsury 已提交
113
def read_in_chunk(file_name, chunk_size=16 * 1024 * 1024, chunk_number=-1):
H
Hsury 已提交
114
    chunk_counter = 0
H
Hsury 已提交
115 116 117
    with open(file_name, "rb") as f:
        while True:
            data = f.read(chunk_size)
H
Hsury 已提交
118
            if data != b"" and (chunk_number == -1 or chunk_counter < chunk_number):
H
Hsury 已提交
119
                yield data
H
Hsury 已提交
120
                chunk_counter += 1
H
Hsury 已提交
121 122 123 124 125 126 127 128 129 130 131
            else:
                return

def history_handle(args):
    history = read_history()
    if history:
        for index, meta_dict in enumerate(history.values()):
            prefix = f"[{index}]"
            print(f"{prefix} {meta_dict['filename']} ({meta_dict['size'] / 1024 / 1024:.2f} MB), 共有{len(meta_dict['block'])}个分块, 上传于{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(meta_dict['time']))}")
            print(f"{' ' * len(prefix)} {meta_string(meta_dict['url'])}")
    else:
H
Hsury 已提交
132
        print(f"暂无历史记录")
H
Hsury 已提交
133 134

def info_handle(args):
H
Hsury 已提交
135 136
    meta_dict = fetch_meta(args.meta)
    if meta_dict:
H
Hsury 已提交
137 138 139 140 141
        print(f"文件名: {meta_dict['filename']}")
        print(f"大小: {meta_dict['size'] / 1024 / 1024:.2f} MB")
        print(f"SHA-1: {meta_dict['sha1']}")
        print(f"上传时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(meta_dict['time']))}")
        print(f"分块数: {len(meta_dict['block'])}")
H
Hsury 已提交
142
        for index, block_dict in enumerate(meta_dict['block']):
H
Hsury 已提交
143
            print(f"分块{index} ({block_dict['size'] / 1024 / 1024:.2f} MB) URL: {block_dict['url']}")
H
Hsury 已提交
144
    else:
H
Hsury 已提交
145
        print("元数据解析失败")
H
Hsury 已提交
146

H
Hsury 已提交
147 148 149 150
def login_handle(args):
    bilibili = Bilibili()
    if bilibili.login(username=args.username, password=args.password):
        bilibili.get_user_info()
H
Hsury 已提交
151
        with open(os.path.join(bundle_dir, "cookies.json"), "w", encoding="utf-8") as f:
H
Hsury 已提交
152 153
            f.write(json.dumps(bilibili.get_cookies(), ensure_ascii=False, indent=2))

H
Hsury 已提交
154
def upload_handle(args):
H
Hsury 已提交
155
    def core(index, block):
H
Hsury 已提交
156 157 158 159
        try:
            block_sha1 = calc_sha1(block, hexdigest=True)
            full_block = bmp_header(block) + block
            full_block_sha1 = calc_sha1(full_block, hexdigest=True)
H
Hsury 已提交
160
            url = is_skippable(full_block_sha1)
H
Hsury 已提交
161 162 163 164 165 166 167
            if url:
                # log(f"分块{index} ({len(block) / 1024 / 1024:.2f} MB) 已存在于服务器")
                block_dict[index] = {
                    'url': url,
                    'size': len(block),
                    'sha1': block_sha1,
                }
H
Hsury 已提交
168
            else:
H
Hsury 已提交
169 170
                # log(f"分块{index} ({len(block) / 1024 / 1024:.2f} MB) 开始上传")
                for _ in range(10):
H
Hsury 已提交
171 172
                    if terminate_flag.is_set():
                        return
H
Hsury 已提交
173 174 175 176 177 178 179 180 181 182
                    response = image_upload(full_block, cookies)
                    if response:
                        if response['code'] == 0:
                            url = response['data']['image_url']
                            log(f"分块{index} ({len(block) / 1024 / 1024:.2f} MB) 上传完毕")
                            block_dict[index] = {
                                'url': url,
                                'size': len(block),
                                'sha1': block_sha1,
                            }
H
Hsury 已提交
183
                            return
H
Hsury 已提交
184 185 186
                        elif response['code'] == -4:
                            terminate_flag.set()
                            log(f"分块{index} ({len(block) / 1024 / 1024:.2f} MB) 第{_ + 1}次上传失败, 请重新登录")
H
Hsury 已提交
187
                            return
H
Hsury 已提交
188 189 190 191 192 193
                    log(f"分块{index} ({len(block) / 1024 / 1024:.2f} MB) 第{_ + 1}次上传失败")
                else:
                    terminate_flag.set()
        except:
            terminate_flag.set()
            traceback.print_exc()
H
Hsury 已提交
194 195
        finally:
            done_flag.release()
H
Hsury 已提交
196

H
Hsury 已提交
197
    def is_skippable(sha1):
H
Hsury 已提交
198
        url = default_url(sha1)
H
Hsury 已提交
199 200 201 202
        headers = {
            'Referer': "http://t.bilibili.com/",
            'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36",
        }
H
Hsury 已提交
203
        for _ in range(5):
H
Hsury 已提交
204
            try:
H
Hsury 已提交
205
                response = requests.head(url, headers=headers, timeout=10)
H
Hsury 已提交
206 207 208 209
                return url if response.status_code == 200 else None
            except:
                pass
        return None
H
Hsury 已提交
210

H
Hsury 已提交
211
    def write_history(first_4mb_sha1, meta_dict, url):
H
Hsury 已提交
212
        history = read_history()
H
Hsury 已提交
213 214
        history[first_4mb_sha1] = meta_dict
        history[first_4mb_sha1]['url'] = url
H
Hsury 已提交
215
        with open(os.path.join(bundle_dir, "history.json"), "w", encoding="utf-8") as f:
H
Hsury 已提交
216 217
            f.write(json.dumps(history, ensure_ascii=False, indent=2))

H
Hsury 已提交
218 219
    start_time = time.time()
    file_name = args.file
H
Hsury 已提交
220 221 222
    if not os.path.exists(file_name):
        log(f"{file_name}不存在")
        return None
H
Hsury 已提交
223 224 225
    if os.path.isdir(file_name):
        log("不支持上传文件夹")
        return None
H
Hsury 已提交
226
    log(f"上传: {os.path.basename(file_name)} ({os.path.getsize(file_name) / 1024 / 1024:.2f} MB)")
H
Hsury 已提交
227
    first_4mb_sha1 = calc_sha1(read_in_chunk(file_name, chunk_size=4 * 1024 * 1024, chunk_number=1), hexdigest=True)
H
Hsury 已提交
228
    history = read_history()
H
Hsury 已提交
229 230 231
    if first_4mb_sha1 in history:
        url = history[first_4mb_sha1]['url']
        log(f"该文件已于{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(history[first_4mb_sha1]['time']))}上传, 共有{len(history[first_4mb_sha1]['block'])}个分块")
H
Hsury 已提交
232 233
        log(meta_string(url))
        return url
H
Hsury 已提交
234 235 236 237 238 239
    try:
        with open(os.path.join(bundle_dir, "cookies.json"), "r", encoding="utf-8") as f:
            cookies = json.loads(f.read())
    except:
        log("Cookies加载失败, 请先登录")
        return None
H
Hsury 已提交
240
    log(f"线程数: {args.thread}")
H
Hsury 已提交
241 242 243 244
    done_flag = threading.Semaphore(0)
    terminate_flag = threading.Event()
    thread_pool = []
    block_dict = {}
H
Hsury 已提交
245
    for index, block in enumerate(read_in_chunk(file_name, chunk_size=args.block_size * 1024 * 1024)):
H
Hsury 已提交
246 247 248 249 250
        if len(thread_pool) >= args.thread:
            done_flag.acquire()
        if not terminate_flag.is_set():
            thread_pool.append(threading.Thread(target=core, args=(index, block)))
            thread_pool[-1].start()
H
Hsury 已提交
251
        else:
H
Hsury 已提交
252
            log("已终止上传, 等待线程回收")
H
Hsury 已提交
253
            break
H
Hsury 已提交
254 255 256 257
    for thread in thread_pool:
        thread.join()
    if terminate_flag.is_set():
        return None
H
Hsury 已提交
258
    sha1 = calc_sha1(read_in_chunk(file_name), hexdigest=True)
H
Hsury 已提交
259
    meta_dict = {
H
Hsury 已提交
260
        'time': int(time.time()),
H
Hsury 已提交
261
        'filename': os.path.basename(file_name),
H
Hsury 已提交
262
        'size': os.path.getsize(file_name),
H
Hsury 已提交
263 264
        'sha1': sha1,
        'block': [block_dict[i] for i in range(len(block_dict))],
H
Hsury 已提交
265
    }
H
Hsury 已提交
266 267
    meta = json.dumps(meta_dict, ensure_ascii=False).encode("utf-8")
    full_meta = bmp_header(meta) + meta
H
Hsury 已提交
268
    for _ in range(10):
H
Hsury 已提交
269
        response = image_upload(full_meta, cookies)
H
Hsury 已提交
270
        if response and response['code'] == 0:
H
Hsury 已提交
271
            url = response['data']['image_url']
H
Hsury 已提交
272
            log("元数据上传完毕")
H
Hsury 已提交
273
            log(f"{os.path.basename(file_name)}上传完毕, 共有{len(meta_dict['block'])}个分块, 用时{int(time.time() - start_time)}秒, 平均速度{meta_dict['size'] / 1024 / 1024 / (time.time() - start_time):.2f} MB/s")
H
Hsury 已提交
274
            log(meta_string(url))
H
Hsury 已提交
275
            write_history(first_4mb_sha1, meta_dict, url)
H
Hsury 已提交
276
            return url
H
Hsury 已提交
277
        log(f"元数据第{_ + 1}次上传失败")
H
Hsury 已提交
278
    else:
H
Hsury 已提交
279
        return None
H
Hsury 已提交
280 281

def download_handle(args):
H
Hsury 已提交
282
    def core(index, block_dict):
H
Hsury 已提交
283 284 285
        try:
            # log(f"分块{index} ({block_dict['size'] / 1024 / 1024:.2f} MB) 开始下载")
            for _ in range(10):
H
Hsury 已提交
286 287
                if terminate_flag.is_set():
                    return
H
Hsury 已提交
288 289 290 291 292 293 294 295 296
                block = image_download(block_dict['url'])
                if block:
                    block = block[62:]
                    if calc_sha1(block, hexdigest=True) == block_dict['sha1']:
                        file_lock.acquire()
                        f.seek(block_offset(index))
                        f.write(block)
                        file_lock.release()
                        log(f"分块{index} ({block_dict['size'] / 1024 / 1024:.2f} MB) 下载完毕")
H
Hsury 已提交
297
                        return
H
Hsury 已提交
298 299
                    else:
                        log(f"分块{index} ({block_dict['size'] / 1024 / 1024:.2f} MB) 校验未通过")
H
Hsury 已提交
300
                else:
H
Hsury 已提交
301
                    log(f"分块{index} ({block_dict['size'] / 1024 / 1024:.2f} MB) 第{_ + 1}次下载失败")
H
Hsury 已提交
302
            else:
H
Hsury 已提交
303 304
                terminate_flag.set()
        except:
H
Hsury 已提交
305
            terminate_flag.set()
H
Hsury 已提交
306
            traceback.print_exc()
H
Hsury 已提交
307 308
        finally:
            done_flag.release()
H
Hsury 已提交
309 310 311

    def block_offset(index):
        return sum(meta_dict['block'][i]['size'] for i in range(index))
H
Hsury 已提交
312

H
Hsury 已提交
313
    def is_overwritable(file_name):
H
Hsury 已提交
314 315 316 317 318
        if args.force:
            return True
        else:
            return (input(f"{os.path.basename(file_name)}已存在于本地, 是否覆盖? [y/N] ") in ["y", "Y"])

H
Hsury 已提交
319
    start_time = time.time()
H
Hsury 已提交
320 321 322
    meta_dict = fetch_meta(args.meta)
    if meta_dict:
        file_name = args.file if args.file else meta_dict['filename']
H
Hsury 已提交
323
        log(f"下载: {os.path.basename(file_name)} ({meta_dict['size'] / 1024 / 1024:.2f} MB), 共有{len(meta_dict['block'])}个分块, 上传于{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(meta_dict['time']))}")
H
Hsury 已提交
324
    else:
H
Hsury 已提交
325
        log("元数据解析失败")
H
Hsury 已提交
326
        return None
H
Hsury 已提交
327
    log(f"线程数: {args.thread}")
H
Hsury 已提交
328
    download_block_list = []
H
Hsury 已提交
329
    if os.path.exists(file_name):
H
Hsury 已提交
330
        if os.path.getsize(file_name) == meta_dict['size'] and calc_sha1(read_in_chunk(file_name), hexdigest=True) == meta_dict['sha1']:
H
Hsury 已提交
331
            log(f"{os.path.basename(file_name)}已存在于本地, 且与服务器端文件内容一致")
H
Hsury 已提交
332
            return file_name
H
Hsury 已提交
333
        elif is_overwritable(file_name):
H
Hsury 已提交
334 335 336 337
            with open(file_name, "rb") as f:
                for index, block_dict in enumerate(meta_dict['block']):
                    f.seek(block_offset(index))
                    if calc_sha1(f.read(block_dict['size']), hexdigest=True) == block_dict['sha1']:
H
Hsury 已提交
338 339
                        # log(f"分块{index} ({block_dict['size'] / 1024 / 1024:.2f} MB) 已存在于本地")
                        pass
H
Hsury 已提交
340
                    else:
H
Hsury 已提交
341
                        # log(f"分块{index} ({block_dict['size'] / 1024 / 1024:.2f} MB) 需要重新下载")
H
Hsury 已提交
342
                        download_block_list.append(index)
H
Hsury 已提交
343
            log(f"{len(download_block_list)}个分块待下载")
H
Hsury 已提交
344 345
        else:
            return None
H
Hsury 已提交
346
    else:
H
Hsury 已提交
347
        download_block_list = list(range(len(meta_dict['block'])))
H
Hsury 已提交
348 349 350 351
    done_flag = threading.Semaphore(0)
    terminate_flag = threading.Event()
    file_lock = threading.Lock()
    thread_pool = []
H
Hsury 已提交
352 353
    with open(file_name, "r+b" if os.path.exists(file_name) else "wb") as f:
        for index in download_block_list:
H
Hsury 已提交
354 355 356
            if len(thread_pool) >= args.thread:
                done_flag.acquire()
            if not terminate_flag.is_set():
H
Hsury 已提交
357
                thread_pool.append(threading.Thread(target=core, args=(index, meta_dict['block'][index])))
H
Hsury 已提交
358
                thread_pool[-1].start()
H
Hsury 已提交
359
            else:
H
Hsury 已提交
360
                log("已终止下载, 等待线程回收")
H
Hsury 已提交
361
                break
H
Hsury 已提交
362 363 364 365
        for thread in thread_pool:
            thread.join()
        if terminate_flag.is_set():
            return None
H
Hsury 已提交
366
        f.truncate(sum(block['size'] for block in meta_dict['block']))
H
Hsury 已提交
367
    log(f"{os.path.basename(file_name)}下载完毕, 用时{int(time.time() - start_time)}秒, 平均速度{meta_dict['size'] / 1024 / 1024 / (time.time() - start_time):.2f} MB/s")
H
Hsury 已提交
368
    sha1 = calc_sha1(read_in_chunk(file_name), hexdigest=True)
H
Hsury 已提交
369
    if sha1 == meta_dict['sha1']:
H
Hsury 已提交
370
        log(f"{os.path.basename(file_name)}校验通过")
H
Hsury 已提交
371
        return file_name
H
Hsury 已提交
372
    else:
H
Hsury 已提交
373
        log(f"{os.path.basename(file_name)}校验未通过")
H
Hsury 已提交
374
        return None
H
Hsury 已提交
375 376

if __name__ == "__main__":
H
Hsury 已提交
377
    signal.signal(signal.SIGINT, lambda signum, frame: os.kill(os.getpid(), 9))
H
Hsury 已提交
378
    parser = argparse.ArgumentParser(description="BiliDrive", epilog="By Hsury, 2019/11/30")
H
Hsury 已提交
379
    subparsers = parser.add_subparsers()
H
Hsury 已提交
380 381 382 383 384
    history_parser = subparsers.add_parser("history", help="view upload history")
    history_parser.set_defaults(func=history_handle)
    info_parser = subparsers.add_parser("info", help="view meta info")
    info_parser.add_argument("meta", help="meta url")
    info_parser.set_defaults(func=info_handle)
H
Hsury 已提交
385
    login_parser = subparsers.add_parser("login", help="log in to bilibili")
H
Hsury 已提交
386 387 388 389
    login_parser.add_argument("username", help="username")
    login_parser.add_argument("password", help="password")
    login_parser.set_defaults(func=login_handle)
    upload_parser = subparsers.add_parser("upload", help="upload a file")
H
Hsury 已提交
390
    upload_parser.add_argument("file", help="name of the file to upload")
H
Hsury 已提交
391
    upload_parser.add_argument("-b", "--block-size", default=4, type=int, help="block size in MB")
H
Hsury 已提交
392
    upload_parser.add_argument("-t", "--thread", default=4, type=int, help="upload thread number")
H
Hsury 已提交
393 394
    upload_parser.set_defaults(func=upload_handle)
    download_parser = subparsers.add_parser("download", help="download a file")
H
Hsury 已提交
395
    download_parser.add_argument("meta", help="meta url")
H
Hsury 已提交
396
    download_parser.add_argument("file", nargs="?", default="", help="new file name")
H
Hsury 已提交
397
    download_parser.add_argument("-f", "--force", action="store_true", help="force to overwrite if file exists")
H
Hsury 已提交
398
    download_parser.add_argument("-t", "--thread", default=8, type=int, help="download thread number")
H
Hsury 已提交
399
    download_parser.set_defaults(func=download_handle)
H
Hsury 已提交
400 401 402
    shell = False
    while True:
        if shell:
H
Hsury 已提交
403
            args = shlex.split(input("BiliDrive > "))
H
Hsury 已提交
404 405 406 407 408 409 410 411 412 413 414 415 416 417
            if args == ["exit"]:
                break
            elif args == ["help"]:
                parser.print_help()
            else:
                try:
                    args = parser.parse_args(args)
                    args.func(args)
                except:
                    pass
        else:
            args = parser.parse_args()
            try:
                args.func(args)
H
Hsury 已提交
418
                break
H
Hsury 已提交
419 420
            except AttributeError:
                shell = True