cookies.py 42.4 KB
Newer Older
1
import base64
2
import contextlib
3
import http.cookiejar
4
import http.cookies
5 6
import json
import os
7
import re
8 9 10 11 12
import shutil
import struct
import subprocess
import sys
import tempfile
P
pukkandan 已提交
13
import time
14
from datetime import datetime, timedelta, timezone
15
from enum import Enum, auto
16 17
from hashlib import pbkdf2_hmac

P
pukkandan 已提交
18 19 20 21 22
from .aes import (
    aes_cbc_decrypt_bytes,
    aes_gcm_decrypt_and_verify_bytes,
    unpad_pkcs7,
)
23 24 25 26 27
from .dependencies import (
    _SECRETSTORAGE_UNAVAILABLE_REASON,
    secretstorage,
    sqlite3,
)
28
from .minicurses import MultilinePrinter, QuietMultilinePrinter
P
pukkandan 已提交
29 30 31 32 33
from .utils import (
    Popen,
    YoutubeDLCookieJar,
    error_to_str,
    expand_path,
34
    is_path_like,
P
pukkandan 已提交
35 36
    try_call,
)
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61

CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}


class YDLLogger:
    def __init__(self, ydl=None):
        self._ydl = ydl

    def debug(self, message):
        if self._ydl:
            self._ydl.write_debug(message)

    def info(self, message):
        if self._ydl:
            self._ydl.to_screen(f'[Cookies] {message}')

    def warning(self, message, only_once=False):
        if self._ydl:
            self._ydl.report_warning(message, only_once)

    def error(self, message):
        if self._ydl:
            self._ydl.report_error(message)

P
pukkandan 已提交
62 63 64 65 66 67 68 69
    class ProgressBar(MultilinePrinter):
        _DELAY, _timer = 0.1, 0

        def print(self, message):
            if time.time() - self._timer > self._DELAY:
                self.print_at_line(f'[Cookies] {message}', 0)
                self._timer = time.time()

70 71 72 73 74
    def progress_bar(self):
        """Return a context manager with a print method. (Optional)"""
        # Do not print to files/pipes, loggers, or when --no-progress is used
        if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
            return
P
pukkandan 已提交
75
        file = self._ydl._out_files.error
76 77 78 79 80
        try:
            if not file.isatty():
                return
        except BaseException:
            return
P
pukkandan 已提交
81
        return self.ProgressBar(file, preserve_output=False)
82 83 84 85 86 87 88 89 90 91 92


def _create_progress_bar(logger):
    if hasattr(logger, 'progress_bar'):
        printer = logger.progress_bar()
        if printer:
            return printer
    printer = QuietMultilinePrinter()
    printer.print = lambda _: None
    return printer

93 94 95 96

def load_cookies(cookie_file, browser_specification, ydl):
    cookie_jars = []
    if browser_specification is not None:
97 98 99
        browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
        cookie_jars.append(
            extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
100 101

    if cookie_file is not None:
102
        is_filename = is_path_like(cookie_file)
103 104 105
        if is_filename:
            cookie_file = expand_path(cookie_file)

106
        jar = YoutubeDLCookieJar(cookie_file)
107
        if not is_filename or os.access(cookie_file, os.R_OK):
108 109 110 111 112 113
            jar.load(ignore_discard=True, ignore_expires=True)
        cookie_jars.append(jar)

    return _merge_cookie_jars(cookie_jars)


114
def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
115
    if browser_name == 'firefox':
116
        return _extract_firefox_cookies(profile, container, logger)
117 118 119
    elif browser_name == 'safari':
        return _extract_safari_cookies(profile, logger)
    elif browser_name in CHROMIUM_BASED_BROWSERS:
120
        return _extract_chrome_cookies(browser_name, profile, keyring, logger)
121
    else:
P
pukkandan 已提交
122
        raise ValueError(f'unknown browser: {browser_name}')
123 124


125
def _extract_firefox_cookies(profile, container, logger):
126
    logger.info('Extracting cookies from firefox')
127
    if not sqlite3:
128 129 130
        logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
                       'Please use a python interpreter compiled with sqlite3 support')
        return YoutubeDLCookieJar()
131 132 133 134 135 136 137 138

    if profile is None:
        search_root = _firefox_browser_dir()
    elif _is_path(profile):
        search_root = profile
    else:
        search_root = os.path.join(_firefox_browser_dir(), profile)

139 140 141 142 143
    cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
    if cookie_database_path is None:
        raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
    logger.debug(f'Extracting cookies from: "{cookie_database_path}"')

144
    container_id = None
145 146
    if container not in (None, 'none'):
        containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
147 148
        if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
            raise FileNotFoundError(f'could not read containers.json in {search_root}')
P
pukkandan 已提交
149
        with open(containers_path) as containers:
150 151 152 153 154 155 156 157
            identities = json.load(containers).get('identities', [])
        container_id = next((context.get('userContextId') for context in identities if container in (
            context.get('name'),
            try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
        )), None)
        if not isinstance(container_id, int):
            raise ValueError(f'could not find firefox container "{container}" in containers.json')

158
    with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
159 160 161
        cursor = None
        try:
            cursor = _open_database_copy(cookie_database_path, tmpdir)
162 163 164 165
            if isinstance(container_id, int):
                logger.debug(
                    f'Only loading cookies from firefox container "{container}", ID {container_id}')
                cursor.execute(
166 167 168 169 170 171 172
                    'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
                    (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
            elif container == 'none':
                logger.debug('Only loading cookies not belonging to any container')
                cursor.execute(
                    'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
            else:
173
                cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
174
            jar = YoutubeDLCookieJar()
175 176 177 178 179
            with _create_progress_bar(logger) as progress_bar:
                table = cursor.fetchall()
                total_cookie_count = len(table)
                for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
                    progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
P
pukkandan 已提交
180
                    cookie = http.cookiejar.Cookie(
181 182 183 184 185
                        version=0, name=name, value=value, port=None, port_specified=False,
                        domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
                        path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
                        comment=None, comment_url=None, rest={})
                    jar.set_cookie(cookie)
P
pukkandan 已提交
186
            logger.info(f'Extracted {len(jar)} cookies from firefox')
187 188 189 190 191 192 193
            return jar
        finally:
            if cursor is not None:
                cursor.connection.close()


def _firefox_browser_dir():
194
    if sys.platform in ('cygwin', 'win32'):
195
        return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
196 197
    elif sys.platform == 'darwin':
        return os.path.expanduser('~/Library/Application Support/Firefox')
198
    return os.path.expanduser('~/.mozilla/firefox')
199 200 201 202


def _get_chromium_based_browser_settings(browser_name):
    # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
203
    if sys.platform in ('cygwin', 'win32'):
204 205 206
        appdata_local = os.path.expandvars('%LOCALAPPDATA%')
        appdata_roaming = os.path.expandvars('%APPDATA%')
        browser_dir = {
207 208 209 210 211 212
            'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
            'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
            'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
            'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
            'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
            'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
213 214 215 216 217 218 219 220 221 222 223 224 225 226
        }[browser_name]

    elif sys.platform == 'darwin':
        appdata = os.path.expanduser('~/Library/Application Support')
        browser_dir = {
            'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
            'chrome': os.path.join(appdata, 'Google/Chrome'),
            'chromium': os.path.join(appdata, 'Chromium'),
            'edge': os.path.join(appdata, 'Microsoft Edge'),
            'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
            'vivaldi': os.path.join(appdata, 'Vivaldi'),
        }[browser_name]

    else:
227 228 229 230 231 232 233 234 235
        config = _config_home()
        browser_dir = {
            'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
            'chrome': os.path.join(config, 'google-chrome'),
            'chromium': os.path.join(config, 'chromium'),
            'edge': os.path.join(config, 'microsoft-edge'),
            'opera': os.path.join(config, 'opera'),
            'vivaldi': os.path.join(config, 'vivaldi'),
        }[browser_name]
236 237 238 239 240 241 242

    # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
    # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
    keyring_name = {
        'brave': 'Brave',
        'chrome': 'Chrome',
        'chromium': 'Chromium',
P
pukkandan 已提交
243
        'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
244 245 246 247 248 249 250 251 252 253 254 255 256
        'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
        'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
    }[browser_name]

    browsers_without_profiles = {'opera'}

    return {
        'browser_dir': browser_dir,
        'keyring_name': keyring_name,
        'supports_profiles': browser_name not in browsers_without_profiles
    }


257
def _extract_chrome_cookies(browser_name, profile, keyring, logger):
P
pukkandan 已提交
258
    logger.info(f'Extracting cookies from {browser_name}')
259

260
    if not sqlite3:
261 262
        logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
                       'Please use a python interpreter compiled with sqlite3 support')
263 264
        return YoutubeDLCookieJar()

265 266 267 268 269 270 271 272 273 274 275
    config = _get_chromium_based_browser_settings(browser_name)

    if profile is None:
        search_root = config['browser_dir']
    elif _is_path(profile):
        search_root = profile
        config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
    else:
        if config['supports_profiles']:
            search_root = os.path.join(config['browser_dir'], profile)
        else:
P
pukkandan 已提交
276
            logger.error(f'{browser_name} does not support profiles')
277 278
            search_root = config['browser_dir']

279
    cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
280
    if cookie_database_path is None:
P
pukkandan 已提交
281 282
        raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
    logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
283

284
    decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
285

286
    with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
287 288 289 290 291 292
        cursor = None
        try:
            cursor = _open_database_copy(cookie_database_path, tmpdir)
            cursor.connection.text_factory = bytes
            column_names = _get_column_names(cursor, 'cookies')
            secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
293
            cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
294 295
            jar = YoutubeDLCookieJar()
            failed_cookies = 0
296
            unencrypted_cookies = 0
297 298 299 300 301 302 303
            with _create_progress_bar(logger) as progress_bar:
                table = cursor.fetchall()
                total_cookie_count = len(table)
                for i, line in enumerate(table):
                    progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
                    is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
                    if not cookie:
304 305
                        failed_cookies += 1
                        continue
306 307 308
                    elif not is_encrypted:
                        unencrypted_cookies += 1
                    jar.set_cookie(cookie)
309
            if failed_cookies > 0:
P
pukkandan 已提交
310
                failed_message = f' ({failed_cookies} could not be decrypted)'
311 312
            else:
                failed_message = ''
P
pukkandan 已提交
313
            logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
P
pukkandan 已提交
314
            counts = decryptor._cookie_counts.copy()
315
            counts['unencrypted'] = unencrypted_cookies
P
pukkandan 已提交
316
            logger.debug(f'cookie version breakdown: {counts}')
317 318 319 320 321 322
            return jar
        finally:
            if cursor is not None:
                cursor.connection.close()


323
def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
P
pukkandan 已提交
324 325 326 327
    host_key = host_key.decode()
    name = name.decode()
    value = value.decode()
    path = path.decode()
328 329 330 331 332 333 334
    is_encrypted = not value and encrypted_value

    if is_encrypted:
        value = decryptor.decrypt(encrypted_value)
        if value is None:
            return is_encrypted, None

P
pukkandan 已提交
335
    return is_encrypted, http.cookiejar.Cookie(
336 337 338 339 340 341
        version=0, name=name, value=value, port=None, port_specified=False,
        domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
        path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
        comment=None, comment_url=None, rest={})


342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
class ChromeCookieDecryptor:
    """
    Overview:

        Linux:
        - cookies are either v10 or v11
            - v10: AES-CBC encrypted with a fixed key
            - v11: AES-CBC encrypted with an OS protected key (keyring)
            - v11 keys can be stored in various places depending on the activate desktop environment [2]

        Mac:
        - cookies are either v10 or not v10
            - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
            - not v10: 'old data' stored as plaintext

        Windows:
        - cookies are either v10 or not v10
            - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
            - not v10: encrypted with DPAPI

    Sources:
    - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
    - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
        - KeyStorageLinux::CreateService
    """

368
    _cookie_counts = {}
369

370
    def decrypt(self, encrypted_value):
371
        raise NotImplementedError('Must be implemented by sub classes')
372

373

374
def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
P
pukkandan 已提交
375
    if sys.platform == 'darwin':
376
        return MacChromeCookieDecryptor(browser_keyring_name, logger)
P
pukkandan 已提交
377
    elif sys.platform in ('win32', 'cygwin'):
378
        return WindowsChromeCookieDecryptor(browser_root, logger)
P
pukkandan 已提交
379
    return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
380 381 382


class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
383
    def __init__(self, browser_keyring_name, logger, *, keyring=None):
384 385
        self._logger = logger
        self._v10_key = self.derive_key(b'peanuts')
386 387 388
        password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
        self._v11_key = None if password is None else self.derive_key(password)
        self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
389 390 391 392 393 394 395 396 397 398 399 400

    @staticmethod
    def derive_key(password):
        # values from
        # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
        return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)

    def decrypt(self, encrypted_value):
        version = encrypted_value[:3]
        ciphertext = encrypted_value[3:]

        if version == b'v10':
401
            self._cookie_counts['v10'] += 1
402 403 404
            return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)

        elif version == b'v11':
405
            self._cookie_counts['v11'] += 1
406
            if self._v11_key is None:
407
                self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
408 409 410 411
                return None
            return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)

        else:
412
            self._cookie_counts['other'] += 1
413 414 415 416 417 418
            return None


class MacChromeCookieDecryptor(ChromeCookieDecryptor):
    def __init__(self, browser_keyring_name, logger):
        self._logger = logger
419
        password = _get_mac_keyring_password(browser_keyring_name, logger)
420
        self._v10_key = None if password is None else self.derive_key(password)
421
        self._cookie_counts = {'v10': 0, 'other': 0}
422 423 424 425 426 427 428 429 430 431 432 433

    @staticmethod
    def derive_key(password):
        # values from
        # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
        return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)

    def decrypt(self, encrypted_value):
        version = encrypted_value[:3]
        ciphertext = encrypted_value[3:]

        if version == b'v10':
434
            self._cookie_counts['v10'] += 1
435 436 437 438 439 440 441
            if self._v10_key is None:
                self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
                return None

            return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)

        else:
442
            self._cookie_counts['other'] += 1
443 444 445 446 447 448 449 450 451
            # other prefixes are considered 'old data' which were stored as plaintext
            # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
            return encrypted_value


class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
    def __init__(self, browser_root, logger):
        self._logger = logger
        self._v10_key = _get_windows_v10_key(browser_root, logger)
452 453
        self._cookie_counts = {'v10': 0, 'other': 0}

454 455 456 457 458
    def decrypt(self, encrypted_value):
        version = encrypted_value[:3]
        ciphertext = encrypted_value[3:]

        if version == b'v10':
459
            self._cookie_counts['v10'] += 1
460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
            if self._v10_key is None:
                self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
                return None

            # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
            #   kNonceLength
            nonce_length = 96 // 8
            # boringssl
            #   EVP_AEAD_AES_GCM_TAG_LEN
            authentication_tag_length = 16

            raw_ciphertext = ciphertext
            nonce = raw_ciphertext[:nonce_length]
            ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
            authentication_tag = raw_ciphertext[-authentication_tag_length:]

            return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)

        else:
479
            self._cookie_counts['other'] += 1
480 481
            # any other prefix means the data is DPAPI encrypted
            # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
P
pukkandan 已提交
482
            return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
483 484 485 486 487 488


def _extract_safari_cookies(profile, logger):
    if profile is not None:
        logger.error('safari does not support profiles')
    if sys.platform != 'darwin':
P
pukkandan 已提交
489
        raise ValueError(f'unsupported platform: {sys.platform}')
490 491 492 493

    cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')

    if not os.path.isfile(cookies_path):
494 495 496 497
        logger.debug('Trying secondary cookie location')
        cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
        if not os.path.isfile(cookies_path):
            raise FileNotFoundError('could not find safari cookies database')
498 499 500 501 502

    with open(cookies_path, 'rb') as f:
        cookies_data = f.read()

    jar = parse_safari_cookies(cookies_data, logger=logger)
P
pukkandan 已提交
503
    logger.info(f'Extracted {len(jar)} cookies from safari')
504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
    return jar


class ParserError(Exception):
    pass


class DataParser:
    def __init__(self, data, logger):
        self._data = data
        self.cursor = 0
        self._logger = logger

    def read_bytes(self, num_bytes):
        if num_bytes < 0:
P
pukkandan 已提交
519
            raise ParserError(f'invalid read of {num_bytes} bytes')
520 521 522 523 524 525 526 527 528 529
        end = self.cursor + num_bytes
        if end > len(self._data):
            raise ParserError('reached end of input')
        data = self._data[self.cursor:end]
        self.cursor = end
        return data

    def expect_bytes(self, expected_value, message):
        value = self.read_bytes(len(expected_value))
        if value != expected_value:
P
pukkandan 已提交
530
            raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
531 532 533 534 535 536 537 538 539 540 541 542 543 544

    def read_uint(self, big_endian=False):
        data_format = '>I' if big_endian else '<I'
        return struct.unpack(data_format, self.read_bytes(4))[0]

    def read_double(self, big_endian=False):
        data_format = '>d' if big_endian else '<d'
        return struct.unpack(data_format, self.read_bytes(8))[0]

    def read_cstring(self):
        buffer = []
        while True:
            c = self.read_bytes(1)
            if c == b'\x00':
P
pukkandan 已提交
545
                return b''.join(buffer).decode()
546 547 548 549 550
            else:
                buffer.append(c)

    def skip(self, num_bytes, description='unknown'):
        if num_bytes > 0:
551
            self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
552
        elif num_bytes < 0:
P
pukkandan 已提交
553
            raise ParserError(f'invalid skip of {num_bytes} bytes')
554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579

    def skip_to(self, offset, description='unknown'):
        self.skip(offset - self.cursor, description)

    def skip_to_end(self, description='unknown'):
        self.skip_to(len(self._data), description)


def _mac_absolute_time_to_posix(timestamp):
    return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())


def _parse_safari_cookies_header(data, logger):
    p = DataParser(data, logger)
    p.expect_bytes(b'cook', 'database signature')
    number_of_pages = p.read_uint(big_endian=True)
    page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
    return page_sizes, p.cursor


def _parse_safari_cookies_page(data, jar, logger):
    p = DataParser(data, logger)
    p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
    number_of_cookies = p.read_uint()
    record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
    if number_of_cookies == 0:
P
pukkandan 已提交
580
        logger.debug(f'a cookies page of size {len(data)} has no cookies')
581 582 583 584
        return

    p.skip_to(record_offsets[0], 'unknown page header field')

585 586 587 588 589 590
    with _create_progress_bar(logger) as progress_bar:
        for i, record_offset in enumerate(record_offsets):
            progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
            p.skip_to(record_offset, 'space between records')
            record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
            p.read_bytes(record_length)
591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
    p.skip_to_end('space in between pages')


def _parse_safari_cookies_record(data, jar, logger):
    p = DataParser(data, logger)
    record_size = p.read_uint()
    p.skip(4, 'unknown record field 1')
    flags = p.read_uint()
    is_secure = bool(flags & 0x0001)
    p.skip(4, 'unknown record field 2')
    domain_offset = p.read_uint()
    name_offset = p.read_uint()
    path_offset = p.read_uint()
    value_offset = p.read_uint()
    p.skip(8, 'unknown record field 3')
    expiration_date = _mac_absolute_time_to_posix(p.read_double())
    _creation_date = _mac_absolute_time_to_posix(p.read_double())  # noqa: F841

    try:
        p.skip_to(domain_offset)
        domain = p.read_cstring()

        p.skip_to(name_offset)
        name = p.read_cstring()

        p.skip_to(path_offset)
        path = p.read_cstring()

        p.skip_to(value_offset)
        value = p.read_cstring()
    except UnicodeDecodeError:
622
        logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
623 624 625 626
        return record_size

    p.skip_to(record_size, 'space at the end of the record')

P
pukkandan 已提交
627
    cookie = http.cookiejar.Cookie(
628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652
        version=0, name=name, value=value, port=None, port_specified=False,
        domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
        path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
        comment=None, comment_url=None, rest={})
    jar.set_cookie(cookie)
    return record_size


def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
    """
    References:
        - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
            - this data appears to be out of date but the important parts of the database structure is the same
            - there are a few bytes here and there which are skipped during parsing
    """
    if jar is None:
        jar = YoutubeDLCookieJar()
    page_sizes, body_start = _parse_safari_cookies_header(data, logger)
    p = DataParser(data[body_start:], logger)
    for page_size in page_sizes:
        _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
    p.skip_to_end('footer')
    return jar


653 654 655 656 657 658 659 660 661 662 663 664
class _LinuxDesktopEnvironment(Enum):
    """
    https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
    DesktopEnvironment
    """
    OTHER = auto()
    CINNAMON = auto()
    GNOME = auto()
    KDE = auto()
    PANTHEON = auto()
    UNITY = auto()
    XFCE = auto()
665 666


667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716
class _LinuxKeyring(Enum):
    """
    https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
    SelectedLinuxBackend
    """
    KWALLET = auto()
    GNOMEKEYRING = auto()
    BASICTEXT = auto()


SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()


def _get_linux_desktop_environment(env):
    """
    https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
    GetDesktopEnvironment
    """
    xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
    desktop_session = env.get('DESKTOP_SESSION', None)
    if xdg_current_desktop is not None:
        xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()

        if xdg_current_desktop == 'Unity':
            if desktop_session is not None and 'gnome-fallback' in desktop_session:
                return _LinuxDesktopEnvironment.GNOME
            else:
                return _LinuxDesktopEnvironment.UNITY
        elif xdg_current_desktop == 'GNOME':
            return _LinuxDesktopEnvironment.GNOME
        elif xdg_current_desktop == 'X-Cinnamon':
            return _LinuxDesktopEnvironment.CINNAMON
        elif xdg_current_desktop == 'KDE':
            return _LinuxDesktopEnvironment.KDE
        elif xdg_current_desktop == 'Pantheon':
            return _LinuxDesktopEnvironment.PANTHEON
        elif xdg_current_desktop == 'XFCE':
            return _LinuxDesktopEnvironment.XFCE
    elif desktop_session is not None:
        if desktop_session in ('mate', 'gnome'):
            return _LinuxDesktopEnvironment.GNOME
        elif 'kde' in desktop_session:
            return _LinuxDesktopEnvironment.KDE
        elif 'xfce' in desktop_session:
            return _LinuxDesktopEnvironment.XFCE
    else:
        if 'GNOME_DESKTOP_SESSION_ID' in env:
            return _LinuxDesktopEnvironment.GNOME
        elif 'KDE_FULL_SESSION' in env:
            return _LinuxDesktopEnvironment.KDE
717
    return _LinuxDesktopEnvironment.OTHER
718 719 720 721 722 723 724 725


def _choose_linux_keyring(logger):
    """
    https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
    SelectBackend
    """
    desktop_environment = _get_linux_desktop_environment(os.environ)
P
pukkandan 已提交
726
    logger.debug(f'detected desktop environment: {desktop_environment.name}')
727 728 729 730
    if desktop_environment == _LinuxDesktopEnvironment.KDE:
        linux_keyring = _LinuxKeyring.KWALLET
    elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
        linux_keyring = _LinuxKeyring.BASICTEXT
731
    else:
732 733 734 735 736 737 738 739 740 741 742 743 744 745 746
        linux_keyring = _LinuxKeyring.GNOMEKEYRING
    return linux_keyring


def _get_kwallet_network_wallet(logger):
    """ The name of the wallet used to store network passwords.

    https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
    KWalletDBus::NetworkWallet
    which does a dbus call to the following function:
    https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
    Wallet::NetworkWallet
    """
    default_wallet = 'kdewallet'
    try:
747
        stdout, _, returncode = Popen.run([
748 749 750 751
            'dbus-send', '--session', '--print-reply=literal',
            '--dest=org.kde.kwalletd5',
            '/modules/kwalletd5',
            'org.kde.KWallet.networkWallet'
752
        ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
753

754
        if returncode:
755 756 757
            logger.warning('failed to read NetworkWallet')
            return default_wallet
        else:
758 759
            logger.debug(f'NetworkWallet = "{stdout.strip()}"')
            return stdout.strip()
P
pukkandan 已提交
760
    except Exception as e:
P
pukkandan 已提交
761
        logger.warning(f'exception while obtaining NetworkWallet: {e}')
762 763 764 765 766 767 768 769 770 771 772 773 774 775 776
        return default_wallet


def _get_kwallet_password(browser_keyring_name, logger):
    logger.debug('using kwallet-query to obtain password from kwallet')

    if shutil.which('kwallet-query') is None:
        logger.error('kwallet-query command not found. KWallet and kwallet-query '
                     'must be installed to read from KWallet. kwallet-query should be'
                     'included in the kwallet package for your distribution')
        return b''

    network_wallet = _get_kwallet_network_wallet(logger)

    try:
777
        stdout, _, returncode = Popen.run([
778
            'kwallet-query',
P
pukkandan 已提交
779 780
            '--read-password', f'{browser_keyring_name} Safe Storage',
            '--folder', f'{browser_keyring_name} Keys',
781 782 783
            network_wallet
        ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)

784 785 786
        if returncode:
            logger.error(f'kwallet-query failed with return code {returncode}. '
                         'Please consult the kwallet-query man page for details')
787 788 789 790 791 792 793 794 795 796 797 798 799 800
            return b''
        else:
            if stdout.lower().startswith(b'failed to read'):
                logger.debug('failed to read password from kwallet. Using empty string instead')
                # this sometimes occurs in KDE because chrome does not check hasEntry and instead
                # just tries to read the value (which kwallet returns "") whereas kwallet-query
                # checks hasEntry. To verify this:
                # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
                # while starting chrome.
                # this may be a bug as the intended behaviour is to generate a random password and store
                # it, but that doesn't matter here.
                return b''
            else:
                logger.debug('password found')
801
                return stdout.rstrip(b'\n')
P
pukkandan 已提交
802 803
    except Exception as e:
        logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
804 805 806 807
        return b''


def _get_gnome_keyring_password(browser_keyring_name, logger):
808 809
    if not secretstorage:
        logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
810 811 812 813 814 815 816 817
        return b''
    # the Gnome keyring does not seem to organise keys in the same way as KWallet,
    # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
    # and presumably searches for its key in the list. It appears that we must do the same.
    # https://github.com/jaraco/keyring/issues/556
    with contextlib.closing(secretstorage.dbus_init()) as con:
        col = secretstorage.get_default_collection(con)
        for item in col.get_all_items():
P
pukkandan 已提交
818
            if item.get_label() == f'{browser_keyring_name} Safe Storage':
819 820 821 822 823 824 825 826 827 828 829 830 831
                return item.get_secret()
        else:
            logger.error('failed to read from keyring')
            return b''


def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
    # note: chrome/chromium can be run with the following flags to determine which keyring backend
    # it has chosen to use
    # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
    # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
    # will not be sufficient in all cases.

832
    keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
833 834 835 836 837 838 839 840 841 842 843 844 845 846 847
    logger.debug(f'Chosen keyring: {keyring.name}')

    if keyring == _LinuxKeyring.KWALLET:
        return _get_kwallet_password(browser_keyring_name, logger)
    elif keyring == _LinuxKeyring.GNOMEKEYRING:
        return _get_gnome_keyring_password(browser_keyring_name, logger)
    elif keyring == _LinuxKeyring.BASICTEXT:
        # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
        return None
    assert False, f'Unknown keyring {keyring}'


def _get_mac_keyring_password(browser_keyring_name, logger):
    logger.debug('using find-generic-password to obtain password from OSX keychain')
    try:
848
        stdout, _, returncode = Popen.run(
849 850 851
            ['security', 'find-generic-password',
             '-w',  # write password to stdout
             '-a', browser_keyring_name,  # match 'account'
P
pukkandan 已提交
852
             '-s', f'{browser_keyring_name} Safe Storage'],  # match 'service'
853
            stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
854 855 856
        if returncode:
            logger.warning('find-generic-password failed')
            return None
857
        return stdout.rstrip(b'\n')
P
pukkandan 已提交
858 859
    except Exception as e:
        logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
860
        return None
861 862 863


def _get_windows_v10_key(browser_root, logger):
864
    path = _find_most_recently_used_file(browser_root, 'Local State', logger)
865 866 867
    if path is None:
        logger.error('could not find local state file')
        return None
868
    logger.debug(f'Found local state file at "{path}"')
P
pukkandan 已提交
869
    with open(path, encoding='utf8') as f:
870 871 872 873 874 875
        data = json.load(f)
    try:
        base64_key = data['os_crypt']['encrypted_key']
    except KeyError:
        logger.error('no encrypted key in Local State')
        return None
876
    encrypted_key = base64.b64decode(base64_key)
877 878 879 880 881 882 883 884 885 886 887 888
    prefix = b'DPAPI'
    if not encrypted_key.startswith(prefix):
        logger.error('invalid key')
        return None
    return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)


def pbkdf2_sha1(password, salt, iterations, key_length):
    return pbkdf2_hmac('sha1', password, salt, iterations, key_length)


def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
P
pukkandan 已提交
889
    plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
890
    try:
P
pukkandan 已提交
891
        return plaintext.decode()
892
    except UnicodeDecodeError:
893
        logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
894 895 896 897 898
        return None


def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
    try:
899
        plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
900
    except ValueError:
901
        logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
902 903 904
        return None

    try:
P
pukkandan 已提交
905
        return plaintext.decode()
906
    except UnicodeDecodeError:
907
        logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
908 909 910 911 912 913 914 915
        return None


def _decrypt_windows_dpapi(ciphertext, logger):
    """
    References:
        - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
    """
P
pukkandan 已提交
916 917 918

    import ctypes
    import ctypes.wintypes
919 920

    class DATA_BLOB(ctypes.Structure):
P
pukkandan 已提交
921
        _fields_ = [('cbData', ctypes.wintypes.DWORD),
922 923 924 925 926 927 928 929 930 931 932 933 934 935 936
                    ('pbData', ctypes.POINTER(ctypes.c_char))]

    buffer = ctypes.create_string_buffer(ciphertext)
    blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
    blob_out = DATA_BLOB()
    ret = ctypes.windll.crypt32.CryptUnprotectData(
        ctypes.byref(blob_in),  # pDataIn
        None,  # ppszDataDescr: human readable description of pDataIn
        None,  # pOptionalEntropy: salt?
        None,  # pvReserved: must be NULL
        None,  # pPromptStruct: information about prompts to display
        0,  # dwFlags
        ctypes.byref(blob_out)  # pDataOut
    )
    if not ret:
937
        logger.warning('failed to decrypt with DPAPI', only_once=True)
938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957
        return None

    result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
    ctypes.windll.kernel32.LocalFree(blob_out.pbData)
    return result


def _config_home():
    return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))


def _open_database_copy(database_path, tmpdir):
    # cannot open sqlite databases if they are already in use (e.g. by the browser)
    database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
    shutil.copy(database_path, database_copy_path)
    conn = sqlite3.connect(database_copy_path)
    return conn.cursor()


def _get_column_names(cursor, table_name):
P
pukkandan 已提交
958
    table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
P
pukkandan 已提交
959
    return [row[1].decode() for row in table_info]
960 961


962
def _find_most_recently_used_file(root, filename, logger):
963
    # if there are multiple browser profiles, take the most recently used one
964 965 966 967 968 969 970 971
    i, paths = 0, []
    with _create_progress_bar(logger) as progress_bar:
        for curr_root, dirs, files in os.walk(root):
            for file in files:
                i += 1
                progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
                if file == filename:
                    paths.append(os.path.join(curr_root, file))
972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988
    return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)


def _merge_cookie_jars(jars):
    output_jar = YoutubeDLCookieJar()
    for jar in jars:
        for cookie in jar:
            output_jar.set_cookie(cookie)
        if jar.filename is not None:
            output_jar.filename = jar.filename
    return output_jar


def _is_path(value):
    return os.path.sep in value


989
def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
990 991
    if browser_name not in SUPPORTED_BROWSERS:
        raise ValueError(f'unsupported browser: "{browser_name}"')
992 993
    if keyring not in (None, *SUPPORTED_KEYRINGS):
        raise ValueError(f'unsupported keyring: "{keyring}"')
994 995
    if profile is not None and _is_path(expand_path(profile)):
        profile = expand_path(profile)
996
    return browser_name, profile, keyring, container
997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091


class LenientSimpleCookie(http.cookies.SimpleCookie):
    """More lenient version of http.cookies.SimpleCookie"""
    # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
    _LEGAL_KEY_CHARS = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
    _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + r"\[\]"

    _RESERVED = {
        "expires",
        "path",
        "comment",
        "domain",
        "max-age",
        "secure",
        "httponly",
        "version",
        "samesite",
    }

    _FLAGS = {"secure", "httponly"}

    # Added 'bad' group to catch the remaining value
    _COOKIE_PATTERN = re.compile(r"""
        \s*                            # Optional whitespace at start of cookie
        (?P<key>                       # Start of group 'key'
        [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
        )                              # End of group 'key'
        (                              # Optional group: there may not be a value.
        \s*=\s*                          # Equal Sign
        (                                # Start of potential value
        (?P<val>                           # Start of group 'val'
        "(?:[^\\"]|\\.)*"                    # Any doublequoted string
        |                                    # or
        \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
        |                                    # or
        [""" + _LEGAL_VALUE_CHARS + r"""]*     # Any word or empty string
        )                                  # End of group 'val'
        |                                  # or
        (?P<bad>(?:\\;|[^;])*?)            # 'bad' group fallback for invalid values
        )                                # End of potential value
        )?                             # End of optional value group
        \s*                            # Any number of spaces.
        (\s+|;|$)                      # Ending either at space, semicolon, or EOS.
        """, re.ASCII | re.VERBOSE)

    def load(self, data):
        # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
        if not isinstance(data, str):
            return super().load(data)

        morsel = None
        index = 0
        length = len(data)

        while 0 <= index < length:
            match = self._COOKIE_PATTERN.search(data, index)
            if not match:
                break

            index = match.end(0)
            if match.group("bad"):
                morsel = None
                continue

            key, value = match.group("key", "val")

            if key[0] == "$":
                if morsel is not None:
                    morsel[key[1:]] = True
                continue

            lower_key = key.lower()
            if lower_key in self._RESERVED:
                if morsel is None:
                    continue

                if value is None:
                    if lower_key not in self._FLAGS:
                        morsel = None
                        continue
                    value = True
                else:
                    value, _ = self.value_decode(value)

                morsel[key] = value

            elif value is not None:
                morsel = self.get(key, http.cookies.Morsel())
                real_value, coded_value = self.value_decode(value)
                morsel.set(key, real_value, coded_value)
                self[key] = morsel

            else:
                morsel = None