__main__.py 17.5 KB
Newer Older
H
Hsury 已提交
1 2 3 4 5 6 7 8
#!/usr/bin/env python3.7
# -*- coding: utf-8 -*-

import argparse
import hashlib
import json
import math
import os
H
Hsury 已提交
9
import re
H
Hsury 已提交
10
import requests
H
Hsury 已提交
11
import shlex
H
Hsury 已提交
12
import signal
H
Hsury 已提交
13
import struct
H
Hsury 已提交
14
import sys
H
Hsury 已提交
15
import threading
H
Hsury 已提交
16
import time
H
Hsury 已提交
17
import traceback
H
Hsury 已提交
18
import types
W
wizardforcel 已提交
19 20 21
from BiliDriveEx import __version__
from BiliDriveEx.bilibili import Bilibili
from BiliDriveEx.encoder import Encoder
H
Hsury 已提交
22 23

log = Bilibili._log
W
wizardforcel 已提交
24
encoder = Encoder()
H
Hsury 已提交
25

H
Hsury 已提交
26 27
bundle_dir = os.path.dirname(sys.executable) if getattr(sys, "frozen", False) else os.path.dirname(os.path.abspath(__file__))

W
wizardforcel 已提交
28 29
default_url = lambda sha1: f"http://i0.hdslb.com/bfs/album/{sha1}.png"
meta_string = lambda url: ("bdex://" + re.findall(r"[a-fA-F0-9]{40}", url)[0]) if re.match(r"^http(s?)://i0.hdslb.com/bfs/album/[a-fA-F0-9]{40}.png$", url) else url
30
size_string = lambda byte: f"{byte / 1024 / 1024 / 1024:.2f} GB" if byte > 1024 * 1024 * 1024 else f"{byte / 1024 / 1024:.2f} MB" if byte > 1024 * 1024 else f"{byte / 1024:.2f} KB" if byte > 1024 else f"{int(byte)} B"
H
Hsury 已提交
31

H
Hsury 已提交
32 33 34 35 36 37 38 39 40
def calc_sha1(data, hexdigest=False):
    sha1 = hashlib.sha1()
    if isinstance(data, types.GeneratorType):
        for chunk in data:
            sha1.update(chunk)
    else:
        sha1.update(data)
    return sha1.hexdigest() if hexdigest else sha1.digest()

W
wizardforcel 已提交
41 42 43 44 45 46 47 48 49
def fetch_meta(s):
    if re.match(r"^bdex://[a-fA-F0-9]{40}$", s):
        full_meta = image_download(default_url(re.findall(r"[a-fA-F0-9]{40}", s)[0]))
    elif re.match(r"^bdrive://[a-fA-F0-9]{40}$", s):
        full_meta = image_download(
            default_url(re.findall(r"[a-fA-F0-9]{40}", s)[0]).replace('png', 'x-ms-bmp')
        )
    elif s.startswith("http://") or s.startswith("https://"):
        full_meta = image_download(s)
H
Hsury 已提交
50 51 52
    else:
        return None
    try:
W
wizardforcel 已提交
53
        meta_dict = json.loads(encoder.decode(full_meta).decode("utf-8"))
H
Hsury 已提交
54 55 56 57
        return meta_dict
    except:
        return None

H
Hsury 已提交
58
def image_upload(data, cookies):
H
Hsury 已提交
59 60 61 62
    url = "https://api.vc.bilibili.com/api/v1/drawImage/upload"
    headers = {
        'Origin': "https://t.bilibili.com",
        'Referer': "https://t.bilibili.com/",
63
        'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36",
H
Hsury 已提交
64 65
    }
    files = {
W
wizardforcel 已提交
66
        'file_up': (f"{int(time.time() * 1000)}.png", data),
W
wizardforcel 已提交
67 68
    }
    data = {
H
Hsury 已提交
69 70 71
        'biz': "draw",
        'category': "daily",
    }
H
Hsury 已提交
72
    try:
W
wizardforcel 已提交
73
        response = requests.post(url, data=data, headers=headers, cookies=cookies, files=files, timeout=300).json()
H
Hsury 已提交
74 75
    except:
        response = None
W
wizardforcel 已提交
76
    print(response)
H
Hsury 已提交
77 78
    return response

H
Hsury 已提交
79
def image_download(url):
H
Hsury 已提交
80 81
    headers = {
        'Referer': "http://t.bilibili.com/",
82
        'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36",
H
Hsury 已提交
83 84 85
    }
    content = []
    last_chunk_time = None
H
Hsury 已提交
86
    try:
87
        for chunk in requests.get(url, headers=headers, timeout=10, stream=True).iter_content(128 * 1024):
H
Hsury 已提交
88 89 90 91 92
            if last_chunk_time is not None and time.time() - last_chunk_time > 5:
                return None
            content.append(chunk)
            last_chunk_time = time.time()
        return b"".join(content)
H
Hsury 已提交
93
    except:
H
Hsury 已提交
94
        return None
H
Hsury 已提交
95

H
Hsury 已提交
96
def read_history():
H
Hsury 已提交
97
    try:
H
Hsury 已提交
98
        with open(os.path.join(bundle_dir, "history.json"), "r", encoding="utf-8") as f:
H
Hsury 已提交
99
            history = json.loads(f.read())
H
Hsury 已提交
100
    except:
H
Hsury 已提交
101 102
        history = {}
    return history
H
Hsury 已提交
103

H
Hsury 已提交
104
def read_in_chunk(file_name, chunk_size=16 * 1024 * 1024, chunk_number=-1):
H
Hsury 已提交
105
    chunk_counter = 0
H
Hsury 已提交
106 107 108
    with open(file_name, "rb") as f:
        while True:
            data = f.read(chunk_size)
H
Hsury 已提交
109
            if data != b"" and (chunk_number == -1 or chunk_counter < chunk_number):
H
Hsury 已提交
110
                yield data
H
Hsury 已提交
111
                chunk_counter += 1
H
Hsury 已提交
112 113 114 115 116 117 118
            else:
                return

def login_handle(args):
    bilibili = Bilibili()
    if bilibili.login(username=args.username, password=args.password):
        bilibili.get_user_info()
H
Hsury 已提交
119
        with open(os.path.join(bundle_dir, "cookies.json"), "w", encoding="utf-8") as f:
H
Hsury 已提交
120 121
            f.write(json.dumps(bilibili.get_cookies(), ensure_ascii=False, indent=2))

H
Hsury 已提交
122
def upload_handle(args):
H
Hsury 已提交
123
    def core(index, block):
H
Hsury 已提交
124 125
        try:
            block_sha1 = calc_sha1(block, hexdigest=True)
W
wizardforcel 已提交
126
            full_block = encoder.encode(block)
H
Hsury 已提交
127
            full_block_sha1 = calc_sha1(full_block, hexdigest=True)
H
Hsury 已提交
128
            url = is_skippable(full_block_sha1)
H
Hsury 已提交
129
            if url:
130
                log(f"分块{index + 1}/{block_num}上传完毕")
H
Hsury 已提交
131 132 133 134 135
                block_dict[index] = {
                    'url': url,
                    'size': len(block),
                    'sha1': block_sha1,
                }
H
Hsury 已提交
136
            else:
137
                # log(f"分块{index + 1}/{block_num}开始上传")
H
Hsury 已提交
138
                for _ in range(10):
H
Hsury 已提交
139 140
                    if terminate_flag.is_set():
                        return
H
Hsury 已提交
141 142 143 144
                    response = image_upload(full_block, cookies)
                    if response:
                        if response['code'] == 0:
                            url = response['data']['image_url']
145
                            log(f"分块{index + 1}/{block_num}上传完毕")
H
Hsury 已提交
146 147 148 149 150
                            block_dict[index] = {
                                'url': url,
                                'size': len(block),
                                'sha1': block_sha1,
                            }
H
Hsury 已提交
151
                            return
H
Hsury 已提交
152 153
                        elif response['code'] == -4:
                            terminate_flag.set()
154
                            log(f"分块{index + 1}/{block_num}{_ + 1}次上传失败, 请重新登录")
H
Hsury 已提交
155
                            return
156
                    log(f"分块{index + 1}/{block_num}{_ + 1}次上传失败")
H
Hsury 已提交
157 158 159 160 161
                else:
                    terminate_flag.set()
        except:
            terminate_flag.set()
            traceback.print_exc()
H
Hsury 已提交
162 163
        finally:
            done_flag.release()
H
Hsury 已提交
164

H
Hsury 已提交
165
    def is_skippable(sha1):
H
Hsury 已提交
166
        url = default_url(sha1)
H
Hsury 已提交
167 168
        headers = {
            'Referer': "http://t.bilibili.com/",
169
            'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36",
H
Hsury 已提交
170
        }
H
Hsury 已提交
171
        for _ in range(5):
H
Hsury 已提交
172
            try:
H
Hsury 已提交
173
                response = requests.head(url, headers=headers, timeout=10)
H
Hsury 已提交
174 175 176 177
                return url if response.status_code == 200 else None
            except:
                pass
        return None
H
Hsury 已提交
178

H
Hsury 已提交
179
    def write_history(first_4mb_sha1, meta_dict, url):
H
Hsury 已提交
180
        history = read_history()
H
Hsury 已提交
181 182
        history[first_4mb_sha1] = meta_dict
        history[first_4mb_sha1]['url'] = url
H
Hsury 已提交
183
        with open(os.path.join(bundle_dir, "history.json"), "w", encoding="utf-8") as f:
H
Hsury 已提交
184 185
            f.write(json.dumps(history, ensure_ascii=False, indent=2))

H
Hsury 已提交
186 187
    start_time = time.time()
    file_name = args.file
H
Hsury 已提交
188
    if not os.path.exists(file_name):
189
        log(f"文件{file_name}不存在")
H
Hsury 已提交
190
        return None
H
Hsury 已提交
191
    if os.path.isdir(file_name):
192
        log("暂不支持上传文件夹")
H
Hsury 已提交
193
        return None
194
    log(f"上传: {os.path.basename(file_name)} ({size_string(os.path.getsize(file_name))})")
H
Hsury 已提交
195
    first_4mb_sha1 = calc_sha1(read_in_chunk(file_name, chunk_size=4 * 1024 * 1024, chunk_number=1), hexdigest=True)
H
Hsury 已提交
196
    history = read_history()
H
Hsury 已提交
197 198
    if first_4mb_sha1 in history:
        url = history[first_4mb_sha1]['url']
199 200
        log(f"文件已于{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(history[first_4mb_sha1]['time']))}上传, 共有{len(history[first_4mb_sha1]['block'])}个分块")
        log(f"META URL -> {meta_string(url)}")
H
Hsury 已提交
201
        return url
H
Hsury 已提交
202 203 204 205 206 207
    try:
        with open(os.path.join(bundle_dir, "cookies.json"), "r", encoding="utf-8") as f:
            cookies = json.loads(f.read())
    except:
        log("Cookies加载失败, 请先登录")
        return None
H
Hsury 已提交
208
    log(f"线程数: {args.thread}")
H
Hsury 已提交
209 210 211 212
    done_flag = threading.Semaphore(0)
    terminate_flag = threading.Event()
    thread_pool = []
    block_dict = {}
213
    block_num = math.ceil(os.path.getsize(file_name) / (args.block_size * 1024 * 1024))
H
Hsury 已提交
214
    for index, block in enumerate(read_in_chunk(file_name, chunk_size=args.block_size * 1024 * 1024)):
H
Hsury 已提交
215 216 217 218 219
        if len(thread_pool) >= args.thread:
            done_flag.acquire()
        if not terminate_flag.is_set():
            thread_pool.append(threading.Thread(target=core, args=(index, block)))
            thread_pool[-1].start()
H
Hsury 已提交
220
        else:
H
Hsury 已提交
221
            log("已终止上传, 等待线程回收")
H
Hsury 已提交
222
            break
H
Hsury 已提交
223 224 225 226
    for thread in thread_pool:
        thread.join()
    if terminate_flag.is_set():
        return None
H
Hsury 已提交
227
    sha1 = calc_sha1(read_in_chunk(file_name), hexdigest=True)
H
Hsury 已提交
228
    meta_dict = {
H
Hsury 已提交
229
        'time': int(time.time()),
H
Hsury 已提交
230
        'filename': os.path.basename(file_name),
H
Hsury 已提交
231
        'size': os.path.getsize(file_name),
H
Hsury 已提交
232 233
        'sha1': sha1,
        'block': [block_dict[i] for i in range(len(block_dict))],
H
Hsury 已提交
234
    }
H
Hsury 已提交
235
    meta = json.dumps(meta_dict, ensure_ascii=False).encode("utf-8")
W
wizardforcel 已提交
236
    full_meta = encoder.encode(meta)
H
Hsury 已提交
237
    for _ in range(10):
H
Hsury 已提交
238
        response = image_upload(full_meta, cookies)
H
Hsury 已提交
239
        if response and response['code'] == 0:
H
Hsury 已提交
240
            url = response['data']['image_url']
H
Hsury 已提交
241
            log("元数据上传完毕")
242 243
            log(f"{meta_dict['filename']} ({size_string(meta_dict['size'])}) 上传完毕, 用时{time.time() - start_time:.1f}秒, 平均速度{size_string(meta_dict['size'] / (time.time() - start_time))}/s")
            log(f"META URL -> {meta_string(url)}")
H
Hsury 已提交
244
            write_history(first_4mb_sha1, meta_dict, url)
H
Hsury 已提交
245
            return url
H
Hsury 已提交
246
        log(f"元数据第{_ + 1}次上传失败")
H
Hsury 已提交
247
    else:
H
Hsury 已提交
248
        return None
H
Hsury 已提交
249 250

def download_handle(args):
H
Hsury 已提交
251
    def core(index, block_dict):
H
Hsury 已提交
252
        try:
253
            # log(f"分块{index + 1}/{len(meta_dict['block'])}开始下载")
H
Hsury 已提交
254
            for _ in range(10):
H
Hsury 已提交
255 256
                if terminate_flag.is_set():
                    return
H
Hsury 已提交
257 258
                block = image_download(block_dict['url'])
                if block:
W
wizardforcel 已提交
259
                    block = encoder.decode(block)
H
Hsury 已提交
260 261 262 263 264
                    if calc_sha1(block, hexdigest=True) == block_dict['sha1']:
                        file_lock.acquire()
                        f.seek(block_offset(index))
                        f.write(block)
                        file_lock.release()
265
                        log(f"分块{index + 1}/{len(meta_dict['block'])}下载完毕")
H
Hsury 已提交
266
                        return
H
Hsury 已提交
267
                    else:
268
                        log(f"分块{index + 1}/{len(meta_dict['block'])}校验未通过")
H
Hsury 已提交
269
                else:
270
                    log(f"分块{index + 1}/{len(meta_dict['block'])}{_ + 1}次下载失败")
H
Hsury 已提交
271
            else:
H
Hsury 已提交
272 273
                terminate_flag.set()
        except:
H
Hsury 已提交
274
            terminate_flag.set()
H
Hsury 已提交
275
            traceback.print_exc()
H
Hsury 已提交
276 277
        finally:
            done_flag.release()
H
Hsury 已提交
278 279 280

    def block_offset(index):
        return sum(meta_dict['block'][i]['size'] for i in range(index))
H
Hsury 已提交
281

H
Hsury 已提交
282
    def is_overwritable(file_name):
H
Hsury 已提交
283 284 285
        if args.force:
            return True
        else:
286
            return (input("文件已存在, 是否覆盖? [y/N] ") in ["y", "Y"])
H
Hsury 已提交
287

H
Hsury 已提交
288
    start_time = time.time()
H
Hsury 已提交
289 290 291
    meta_dict = fetch_meta(args.meta)
    if meta_dict:
        file_name = args.file if args.file else meta_dict['filename']
292
        log(f"下载: {os.path.basename(file_name)} ({size_string(meta_dict['size'])}), 共有{len(meta_dict['block'])}个分块, 上传于{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(meta_dict['time']))}")
H
Hsury 已提交
293
    else:
H
Hsury 已提交
294
        log("元数据解析失败")
H
Hsury 已提交
295
        return None
H
Hsury 已提交
296
    log(f"线程数: {args.thread}")
H
Hsury 已提交
297
    download_block_list = []
H
Hsury 已提交
298
    if os.path.exists(file_name):
H
Hsury 已提交
299
        if os.path.getsize(file_name) == meta_dict['size'] and calc_sha1(read_in_chunk(file_name), hexdigest=True) == meta_dict['sha1']:
300
            log("文件已存在, 且与服务器端内容一致")
H
Hsury 已提交
301
            return file_name
H
Hsury 已提交
302
        elif is_overwritable(file_name):
H
Hsury 已提交
303 304 305 306
            with open(file_name, "rb") as f:
                for index, block_dict in enumerate(meta_dict['block']):
                    f.seek(block_offset(index))
                    if calc_sha1(f.read(block_dict['size']), hexdigest=True) == block_dict['sha1']:
307
                        # log(f"分块{index + 1}/{len(meta_dict['block'])}校验通过")
H
Hsury 已提交
308
                        pass
H
Hsury 已提交
309
                    else:
310
                        # log(f"分块{index + 1}/{len(meta_dict['block'])}校验未通过")
H
Hsury 已提交
311
                        download_block_list.append(index)
312
            log(f"{len(download_block_list)}/{len(meta_dict['block'])}个分块待下载")
H
Hsury 已提交
313 314
        else:
            return None
H
Hsury 已提交
315
    else:
H
Hsury 已提交
316
        download_block_list = list(range(len(meta_dict['block'])))
H
Hsury 已提交
317 318 319 320
    done_flag = threading.Semaphore(0)
    terminate_flag = threading.Event()
    file_lock = threading.Lock()
    thread_pool = []
H
Hsury 已提交
321 322
    with open(file_name, "r+b" if os.path.exists(file_name) else "wb") as f:
        for index in download_block_list:
H
Hsury 已提交
323 324 325
            if len(thread_pool) >= args.thread:
                done_flag.acquire()
            if not terminate_flag.is_set():
H
Hsury 已提交
326
                thread_pool.append(threading.Thread(target=core, args=(index, meta_dict['block'][index])))
H
Hsury 已提交
327
                thread_pool[-1].start()
H
Hsury 已提交
328
            else:
H
Hsury 已提交
329
                log("已终止下载, 等待线程回收")
H
Hsury 已提交
330
                break
H
Hsury 已提交
331 332 333 334
        for thread in thread_pool:
            thread.join()
        if terminate_flag.is_set():
            return None
H
Hsury 已提交
335
        f.truncate(sum(block['size'] for block in meta_dict['block']))
336
    log(f"{os.path.basename(file_name)} ({size_string(meta_dict['size'])}) 下载完毕, 用时{time.time() - start_time:.1f}秒, 平均速度{size_string(meta_dict['size'] / (time.time() - start_time))}/s")
H
Hsury 已提交
337
    sha1 = calc_sha1(read_in_chunk(file_name), hexdigest=True)
H
Hsury 已提交
338
    if sha1 == meta_dict['sha1']:
339
        log("文件校验通过")
H
Hsury 已提交
340
        return file_name
H
Hsury 已提交
341
    else:
342
        log("文件校验未通过")
H
Hsury 已提交
343
        return None
H
Hsury 已提交
344

H
Hsury 已提交
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
def info_handle(args):
    meta_dict = fetch_meta(args.meta)
    if meta_dict:
        print(f"文件名: {meta_dict['filename']}")
        print(f"大小: {size_string(meta_dict['size'])}")
        print(f"SHA-1: {meta_dict['sha1']}")
        print(f"上传时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(meta_dict['time']))}")
        print(f"分块数: {len(meta_dict['block'])}")
        for index, block_dict in enumerate(meta_dict['block']):
            print(f"分块{index + 1} ({size_string(block_dict['size'])}) URL: {block_dict['url']}")
    else:
        print("元数据解析失败")

def history_handle(args):
    history = read_history()
    if history:
        for index, meta_dict in enumerate(history.values()):
            prefix = f"[{index + 1}]"
            print(f"{prefix} {meta_dict['filename']} ({size_string(meta_dict['size'])}), 共有{len(meta_dict['block'])}个分块, 上传于{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(meta_dict['time']))}")
            print(f"{' ' * len(prefix)} META URL -> {meta_string(meta_dict['url'])}")
    else:
        print(f"暂无历史记录")

def main():
H
Hsury 已提交
369
    signal.signal(signal.SIGINT, lambda signum, frame: os.kill(os.getpid(), 9))
H
Hsury 已提交
370 371
    parser = argparse.ArgumentParser(prog="BiliDrive", description="Make Bilibili A Great Cloud Storage!", formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("-v", "--version", action="version", version=f"BiliDrive version: {__version__}")
H
Hsury 已提交
372
    subparsers = parser.add_subparsers()
H
Hsury 已提交
373
    login_parser = subparsers.add_parser("login", help="log in to bilibili")
H
Hsury 已提交
374 375
    login_parser.add_argument("username", help="your bilibili username")
    login_parser.add_argument("password", help="your bilibili password")
H
Hsury 已提交
376 377
    login_parser.set_defaults(func=login_handle)
    upload_parser = subparsers.add_parser("upload", help="upload a file")
H
Hsury 已提交
378
    upload_parser.add_argument("file", help="name of the file to upload")
H
Hsury 已提交
379
    upload_parser.add_argument("-b", "--block-size", default=4, type=int, help="block size in MB")
H
Hsury 已提交
380
    upload_parser.add_argument("-t", "--thread", default=4, type=int, help="upload thread number")
H
Hsury 已提交
381 382
    upload_parser.set_defaults(func=upload_handle)
    download_parser = subparsers.add_parser("download", help="download a file")
H
Hsury 已提交
383
    download_parser.add_argument("meta", help="meta url")
H
Hsury 已提交
384
    download_parser.add_argument("file", nargs="?", default="", help="new file name")
H
Hsury 已提交
385
    download_parser.add_argument("-f", "--force", action="store_true", help="force to overwrite if file exists")
H
Hsury 已提交
386
    download_parser.add_argument("-t", "--thread", default=8, type=int, help="download thread number")
H
Hsury 已提交
387
    download_parser.set_defaults(func=download_handle)
H
Hsury 已提交
388 389 390 391 392
    info_parser = subparsers.add_parser("info", help="show meta info")
    info_parser.add_argument("meta", help="meta url")
    info_parser.set_defaults(func=info_handle)
    history_parser = subparsers.add_parser("history", help="show upload history")
    history_parser.set_defaults(func=history_handle)
H
Hsury 已提交
393 394 395
    shell = False
    while True:
        if shell:
H
Hsury 已提交
396
            args = shlex.split(input("BiliDrive > "))
H
Hsury 已提交
397 398 399 400 401
            try:
                args = parser.parse_args(args)
                args.func(args)
            except:
                pass
H
Hsury 已提交
402 403 404 405
        else:
            args = parser.parse_args()
            try:
                args.func(args)
H
Hsury 已提交
406
                break
H
Hsury 已提交
407 408
            except AttributeError:
                shell = True
H
Hsury 已提交
409 410 411 412 413 414 415
                subparsers.add_parser("help", help="show this help message").set_defaults(func=lambda _: parser.parse_args(["--help"]).func())
                subparsers.add_parser("version", help="show program's version number").set_defaults(func=lambda _: parser.parse_args(["--version"]).func())
                subparsers.add_parser("exit", help="exit program").set_defaults(func=lambda _: os._exit(0))
                parser.print_help()

if __name__ == "__main__":
    main()