cookies.py 52.7 KB
Newer Older
1
import base64
2
import collections
3
import contextlib
4
import http.cookiejar
5
import http.cookies
6
import io
7 8
import json
import os
9
import re
10 11 12 13 14
import shutil
import struct
import subprocess
import sys
import tempfile
P
pukkandan 已提交
15
import time
16
import urllib.request
17
from datetime import datetime, timedelta, timezone
18
from enum import Enum, auto
19 20
from hashlib import pbkdf2_hmac

P
pukkandan 已提交
21 22 23 24 25
from .aes import (
    aes_cbc_decrypt_bytes,
    aes_gcm_decrypt_and_verify_bytes,
    unpad_pkcs7,
)
26
from .compat import functools
27 28 29 30 31
from .dependencies import (
    _SECRETSTORAGE_UNAVAILABLE_REASON,
    secretstorage,
    sqlite3,
)
32
from .minicurses import MultilinePrinter, QuietMultilinePrinter
P
pukkandan 已提交
33 34 35
from .utils import (
    Popen,
    error_to_str,
36
    escape_url,
P
pukkandan 已提交
37
    expand_path,
38
    is_path_like,
39 40
    sanitize_url,
    str_or_none,
P
pukkandan 已提交
41
    try_call,
42
    write_string,
P
pukkandan 已提交
43
)
44
from .utils._utils import _YDLLogger
45 46 47 48 49

CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}


50 51 52
class YDLLogger(_YDLLogger):
    def warning(self, message, only_once=False):  # compat
        return super().warning(message, once=only_once)
53

P
pukkandan 已提交
54 55 56 57 58 59 60 61
    class ProgressBar(MultilinePrinter):
        _DELAY, _timer = 0.1, 0

        def print(self, message):
            if time.time() - self._timer > self._DELAY:
                self.print_at_line(f'[Cookies] {message}', 0)
                self._timer = time.time()

62 63 64 65 66
    def progress_bar(self):
        """Return a context manager with a print method. (Optional)"""
        # Do not print to files/pipes, loggers, or when --no-progress is used
        if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
            return
P
pukkandan 已提交
67
        file = self._ydl._out_files.error
68 69 70 71 72
        try:
            if not file.isatty():
                return
        except BaseException:
            return
P
pukkandan 已提交
73
        return self.ProgressBar(file, preserve_output=False)
74 75 76 77 78 79 80 81 82 83 84


def _create_progress_bar(logger):
    if hasattr(logger, 'progress_bar'):
        printer = logger.progress_bar()
        if printer:
            return printer
    printer = QuietMultilinePrinter()
    printer.print = lambda _: None
    return printer

85 86 87 88

def load_cookies(cookie_file, browser_specification, ydl):
    cookie_jars = []
    if browser_specification is not None:
89 90 91
        browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
        cookie_jars.append(
            extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
92 93

    if cookie_file is not None:
94
        is_filename = is_path_like(cookie_file)
95 96 97
        if is_filename:
            cookie_file = expand_path(cookie_file)

98
        jar = YoutubeDLCookieJar(cookie_file)
99
        if not is_filename or os.access(cookie_file, os.R_OK):
P
pukkandan 已提交
100
            jar.load()
101 102 103 104 105
        cookie_jars.append(jar)

    return _merge_cookie_jars(cookie_jars)


106
def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
107
    if browser_name == 'firefox':
108
        return _extract_firefox_cookies(profile, container, logger)
109 110 111
    elif browser_name == 'safari':
        return _extract_safari_cookies(profile, logger)
    elif browser_name in CHROMIUM_BASED_BROWSERS:
112
        return _extract_chrome_cookies(browser_name, profile, keyring, logger)
113
    else:
P
pukkandan 已提交
114
        raise ValueError(f'unknown browser: {browser_name}')
115 116


117
def _extract_firefox_cookies(profile, container, logger):
118
    logger.info('Extracting cookies from firefox')
119
    if not sqlite3:
120 121 122
        logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
                       'Please use a python interpreter compiled with sqlite3 support')
        return YoutubeDLCookieJar()
123 124 125 126 127 128 129 130

    if profile is None:
        search_root = _firefox_browser_dir()
    elif _is_path(profile):
        search_root = profile
    else:
        search_root = os.path.join(_firefox_browser_dir(), profile)

131 132 133 134 135
    cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
    if cookie_database_path is None:
        raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
    logger.debug(f'Extracting cookies from: "{cookie_database_path}"')

136
    container_id = None
137 138
    if container not in (None, 'none'):
        containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
139 140
        if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
            raise FileNotFoundError(f'could not read containers.json in {search_root}')
P
pukkandan 已提交
141
        with open(containers_path) as containers:
142 143 144 145 146 147 148 149
            identities = json.load(containers).get('identities', [])
        container_id = next((context.get('userContextId') for context in identities if container in (
            context.get('name'),
            try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
        )), None)
        if not isinstance(container_id, int):
            raise ValueError(f'could not find firefox container "{container}" in containers.json')

150
    with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
151 152 153
        cursor = None
        try:
            cursor = _open_database_copy(cookie_database_path, tmpdir)
154 155 156 157
            if isinstance(container_id, int):
                logger.debug(
                    f'Only loading cookies from firefox container "{container}", ID {container_id}')
                cursor.execute(
158 159 160 161 162 163 164
                    'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
                    (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
            elif container == 'none':
                logger.debug('Only loading cookies not belonging to any container')
                cursor.execute(
                    'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
            else:
165
                cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
166
            jar = YoutubeDLCookieJar()
167 168 169 170 171
            with _create_progress_bar(logger) as progress_bar:
                table = cursor.fetchall()
                total_cookie_count = len(table)
                for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
                    progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
P
pukkandan 已提交
172
                    cookie = http.cookiejar.Cookie(
173 174 175 176 177
                        version=0, name=name, value=value, port=None, port_specified=False,
                        domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
                        path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
                        comment=None, comment_url=None, rest={})
                    jar.set_cookie(cookie)
P
pukkandan 已提交
178
            logger.info(f'Extracted {len(jar)} cookies from firefox')
179 180 181 182 183 184 185
            return jar
        finally:
            if cursor is not None:
                cursor.connection.close()


def _firefox_browser_dir():
186
    if sys.platform in ('cygwin', 'win32'):
187
        return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
188 189
    elif sys.platform == 'darwin':
        return os.path.expanduser('~/Library/Application Support/Firefox')
190
    return os.path.expanduser('~/.mozilla/firefox')
191 192 193 194


def _get_chromium_based_browser_settings(browser_name):
    # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
195
    if sys.platform in ('cygwin', 'win32'):
196 197 198
        appdata_local = os.path.expandvars('%LOCALAPPDATA%')
        appdata_roaming = os.path.expandvars('%APPDATA%')
        browser_dir = {
199 200 201 202 203 204
            'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
            'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
            'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
            'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
            'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
            'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
205 206 207 208 209 210 211 212 213 214 215 216 217 218
        }[browser_name]

    elif sys.platform == 'darwin':
        appdata = os.path.expanduser('~/Library/Application Support')
        browser_dir = {
            'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
            'chrome': os.path.join(appdata, 'Google/Chrome'),
            'chromium': os.path.join(appdata, 'Chromium'),
            'edge': os.path.join(appdata, 'Microsoft Edge'),
            'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
            'vivaldi': os.path.join(appdata, 'Vivaldi'),
        }[browser_name]

    else:
219 220 221 222 223 224 225 226 227
        config = _config_home()
        browser_dir = {
            'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
            'chrome': os.path.join(config, 'google-chrome'),
            'chromium': os.path.join(config, 'chromium'),
            'edge': os.path.join(config, 'microsoft-edge'),
            'opera': os.path.join(config, 'opera'),
            'vivaldi': os.path.join(config, 'vivaldi'),
        }[browser_name]
228 229 230 231 232 233 234

    # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
    # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
    keyring_name = {
        'brave': 'Brave',
        'chrome': 'Chrome',
        'chromium': 'Chromium',
P
pukkandan 已提交
235
        'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
236 237 238 239 240 241 242 243 244 245 246 247 248
        'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
        'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
    }[browser_name]

    browsers_without_profiles = {'opera'}

    return {
        'browser_dir': browser_dir,
        'keyring_name': keyring_name,
        'supports_profiles': browser_name not in browsers_without_profiles
    }


249
def _extract_chrome_cookies(browser_name, profile, keyring, logger):
P
pukkandan 已提交
250
    logger.info(f'Extracting cookies from {browser_name}')
251

252
    if not sqlite3:
253 254
        logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
                       'Please use a python interpreter compiled with sqlite3 support')
255 256
        return YoutubeDLCookieJar()

257 258 259 260 261 262 263 264 265 266 267
    config = _get_chromium_based_browser_settings(browser_name)

    if profile is None:
        search_root = config['browser_dir']
    elif _is_path(profile):
        search_root = profile
        config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
    else:
        if config['supports_profiles']:
            search_root = os.path.join(config['browser_dir'], profile)
        else:
P
pukkandan 已提交
268
            logger.error(f'{browser_name} does not support profiles')
269 270
            search_root = config['browser_dir']

271
    cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
272
    if cookie_database_path is None:
P
pukkandan 已提交
273 274
        raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
    logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
275

276
    decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
277

278
    with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
279 280 281 282 283 284
        cursor = None
        try:
            cursor = _open_database_copy(cookie_database_path, tmpdir)
            cursor.connection.text_factory = bytes
            column_names = _get_column_names(cursor, 'cookies')
            secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
285
            cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
286 287
            jar = YoutubeDLCookieJar()
            failed_cookies = 0
288
            unencrypted_cookies = 0
289 290 291 292 293 294 295
            with _create_progress_bar(logger) as progress_bar:
                table = cursor.fetchall()
                total_cookie_count = len(table)
                for i, line in enumerate(table):
                    progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
                    is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
                    if not cookie:
296 297
                        failed_cookies += 1
                        continue
298 299 300
                    elif not is_encrypted:
                        unencrypted_cookies += 1
                    jar.set_cookie(cookie)
301
            if failed_cookies > 0:
P
pukkandan 已提交
302
                failed_message = f' ({failed_cookies} could not be decrypted)'
303 304
            else:
                failed_message = ''
P
pukkandan 已提交
305
            logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
P
pukkandan 已提交
306
            counts = decryptor._cookie_counts.copy()
307
            counts['unencrypted'] = unencrypted_cookies
P
pukkandan 已提交
308
            logger.debug(f'cookie version breakdown: {counts}')
309 310 311 312 313 314
            return jar
        finally:
            if cursor is not None:
                cursor.connection.close()


315
def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
P
pukkandan 已提交
316 317 318 319
    host_key = host_key.decode()
    name = name.decode()
    value = value.decode()
    path = path.decode()
320 321 322 323 324 325 326
    is_encrypted = not value and encrypted_value

    if is_encrypted:
        value = decryptor.decrypt(encrypted_value)
        if value is None:
            return is_encrypted, None

P
pukkandan 已提交
327
    return is_encrypted, http.cookiejar.Cookie(
328 329 330 331 332 333
        version=0, name=name, value=value, port=None, port_specified=False,
        domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
        path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
        comment=None, comment_url=None, rest={})


334 335 336 337 338 339 340
class ChromeCookieDecryptor:
    """
    Overview:

        Linux:
        - cookies are either v10 or v11
            - v10: AES-CBC encrypted with a fixed key
341
                - also attempts empty password if decryption fails
342
            - v11: AES-CBC encrypted with an OS protected key (keyring)
343
                - also attempts empty password if decryption fails
344 345 346 347 348 349 350 351 352 353 354 355 356 357
            - v11 keys can be stored in various places depending on the activate desktop environment [2]

        Mac:
        - cookies are either v10 or not v10
            - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
            - not v10: 'old data' stored as plaintext

        Windows:
        - cookies are either v10 or not v10
            - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
            - not v10: encrypted with DPAPI

    Sources:
    - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
358
    - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc
359 360 361
        - KeyStorageLinux::CreateService
    """

362
    _cookie_counts = {}
363

364
    def decrypt(self, encrypted_value):
365
        raise NotImplementedError('Must be implemented by sub classes')
366

367

368
def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
P
pukkandan 已提交
369
    if sys.platform == 'darwin':
370
        return MacChromeCookieDecryptor(browser_keyring_name, logger)
P
pukkandan 已提交
371
    elif sys.platform in ('win32', 'cygwin'):
372
        return WindowsChromeCookieDecryptor(browser_root, logger)
P
pukkandan 已提交
373
    return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
374 375 376


class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
377
    def __init__(self, browser_keyring_name, logger, *, keyring=None):
378 379
        self._logger = logger
        self._v10_key = self.derive_key(b'peanuts')
380
        self._empty_key = self.derive_key(b'')
381
        self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
382 383 384 385 386 387 388
        self._browser_keyring_name = browser_keyring_name
        self._keyring = keyring

    @functools.cached_property
    def _v11_key(self):
        password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger)
        return None if password is None else self.derive_key(password)
389 390 391 392

    @staticmethod
    def derive_key(password):
        # values from
393
        # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc
394 395 396
        return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)

    def decrypt(self, encrypted_value):
397 398 399 400 401 402 403 404 405 406
        """

        following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt
        with an empty password. The failure detection is not the same as what chromium uses so the
        results won't be perfect

        References:
            - [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/
                - a bugfix to try an empty password as a fallback
        """
407 408 409 410
        version = encrypted_value[:3]
        ciphertext = encrypted_value[3:]

        if version == b'v10':
411
            self._cookie_counts['v10'] += 1
412
            return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger)
413 414

        elif version == b'v11':
415
            self._cookie_counts['v11'] += 1
416
            if self._v11_key is None:
417
                self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
418
                return None
419
            return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger)
420 421

        else:
422
            self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
423
            self._cookie_counts['other'] += 1
424 425 426 427 428 429
            return None


class MacChromeCookieDecryptor(ChromeCookieDecryptor):
    def __init__(self, browser_keyring_name, logger):
        self._logger = logger
430
        password = _get_mac_keyring_password(browser_keyring_name, logger)
431
        self._v10_key = None if password is None else self.derive_key(password)
432
        self._cookie_counts = {'v10': 0, 'other': 0}
433 434 435 436

    @staticmethod
    def derive_key(password):
        # values from
437
        # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
438 439 440 441 442 443 444
        return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)

    def decrypt(self, encrypted_value):
        version = encrypted_value[:3]
        ciphertext = encrypted_value[3:]

        if version == b'v10':
445
            self._cookie_counts['v10'] += 1
446 447 448 449
            if self._v10_key is None:
                self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
                return None

450
            return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger)
451 452

        else:
453
            self._cookie_counts['other'] += 1
454
            # other prefixes are considered 'old data' which were stored as plaintext
455
            # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
456 457 458 459 460 461 462
            return encrypted_value


class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
    def __init__(self, browser_root, logger):
        self._logger = logger
        self._v10_key = _get_windows_v10_key(browser_root, logger)
463 464
        self._cookie_counts = {'v10': 0, 'other': 0}

465 466 467 468 469
    def decrypt(self, encrypted_value):
        version = encrypted_value[:3]
        ciphertext = encrypted_value[3:]

        if version == b'v10':
470
            self._cookie_counts['v10'] += 1
471 472 473 474
            if self._v10_key is None:
                self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
                return None

475
            # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
476 477 478 479 480 481 482 483 484 485 486 487 488 489
            #   kNonceLength
            nonce_length = 96 // 8
            # boringssl
            #   EVP_AEAD_AES_GCM_TAG_LEN
            authentication_tag_length = 16

            raw_ciphertext = ciphertext
            nonce = raw_ciphertext[:nonce_length]
            ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
            authentication_tag = raw_ciphertext[-authentication_tag_length:]

            return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)

        else:
490
            self._cookie_counts['other'] += 1
491
            # any other prefix means the data is DPAPI encrypted
492
            # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
P
pukkandan 已提交
493
            return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
494 495 496 497


def _extract_safari_cookies(profile, logger):
    if sys.platform != 'darwin':
P
pukkandan 已提交
498
        raise ValueError(f'unsupported platform: {sys.platform}')
499

500 501 502 503 504 505 506
    if profile:
        cookies_path = os.path.expanduser(profile)
        if not os.path.isfile(cookies_path):
            raise FileNotFoundError('custom safari cookies database not found')

    else:
        cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
507

508
        if not os.path.isfile(cookies_path):
509 510 511 512
            logger.debug('Trying secondary cookie location')
            cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
            if not os.path.isfile(cookies_path):
                raise FileNotFoundError('could not find safari cookies database')
513 514 515 516 517

    with open(cookies_path, 'rb') as f:
        cookies_data = f.read()

    jar = parse_safari_cookies(cookies_data, logger=logger)
P
pukkandan 已提交
518
    logger.info(f'Extracted {len(jar)} cookies from safari')
519 520 521 522 523 524 525 526 527 528 529 530 531 532 533
    return jar


class ParserError(Exception):
    pass


class DataParser:
    def __init__(self, data, logger):
        self._data = data
        self.cursor = 0
        self._logger = logger

    def read_bytes(self, num_bytes):
        if num_bytes < 0:
P
pukkandan 已提交
534
            raise ParserError(f'invalid read of {num_bytes} bytes')
535 536 537 538 539 540 541 542 543 544
        end = self.cursor + num_bytes
        if end > len(self._data):
            raise ParserError('reached end of input')
        data = self._data[self.cursor:end]
        self.cursor = end
        return data

    def expect_bytes(self, expected_value, message):
        value = self.read_bytes(len(expected_value))
        if value != expected_value:
P
pukkandan 已提交
545
            raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
546 547 548 549 550 551 552 553 554 555 556 557 558 559

    def read_uint(self, big_endian=False):
        data_format = '>I' if big_endian else '<I'
        return struct.unpack(data_format, self.read_bytes(4))[0]

    def read_double(self, big_endian=False):
        data_format = '>d' if big_endian else '<d'
        return struct.unpack(data_format, self.read_bytes(8))[0]

    def read_cstring(self):
        buffer = []
        while True:
            c = self.read_bytes(1)
            if c == b'\x00':
P
pukkandan 已提交
560
                return b''.join(buffer).decode()
561 562 563 564 565
            else:
                buffer.append(c)

    def skip(self, num_bytes, description='unknown'):
        if num_bytes > 0:
566
            self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
567
        elif num_bytes < 0:
P
pukkandan 已提交
568
            raise ParserError(f'invalid skip of {num_bytes} bytes')
569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594

    def skip_to(self, offset, description='unknown'):
        self.skip(offset - self.cursor, description)

    def skip_to_end(self, description='unknown'):
        self.skip_to(len(self._data), description)


def _mac_absolute_time_to_posix(timestamp):
    return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())


def _parse_safari_cookies_header(data, logger):
    p = DataParser(data, logger)
    p.expect_bytes(b'cook', 'database signature')
    number_of_pages = p.read_uint(big_endian=True)
    page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
    return page_sizes, p.cursor


def _parse_safari_cookies_page(data, jar, logger):
    p = DataParser(data, logger)
    p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
    number_of_cookies = p.read_uint()
    record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
    if number_of_cookies == 0:
P
pukkandan 已提交
595
        logger.debug(f'a cookies page of size {len(data)} has no cookies')
596 597 598 599
        return

    p.skip_to(record_offsets[0], 'unknown page header field')

600 601 602 603 604 605
    with _create_progress_bar(logger) as progress_bar:
        for i, record_offset in enumerate(record_offsets):
            progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
            p.skip_to(record_offset, 'space between records')
            record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
            p.read_bytes(record_length)
606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636
    p.skip_to_end('space in between pages')


def _parse_safari_cookies_record(data, jar, logger):
    p = DataParser(data, logger)
    record_size = p.read_uint()
    p.skip(4, 'unknown record field 1')
    flags = p.read_uint()
    is_secure = bool(flags & 0x0001)
    p.skip(4, 'unknown record field 2')
    domain_offset = p.read_uint()
    name_offset = p.read_uint()
    path_offset = p.read_uint()
    value_offset = p.read_uint()
    p.skip(8, 'unknown record field 3')
    expiration_date = _mac_absolute_time_to_posix(p.read_double())
    _creation_date = _mac_absolute_time_to_posix(p.read_double())  # noqa: F841

    try:
        p.skip_to(domain_offset)
        domain = p.read_cstring()

        p.skip_to(name_offset)
        name = p.read_cstring()

        p.skip_to(path_offset)
        path = p.read_cstring()

        p.skip_to(value_offset)
        value = p.read_cstring()
    except UnicodeDecodeError:
637
        logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
638 639 640 641
        return record_size

    p.skip_to(record_size, 'space at the end of the record')

P
pukkandan 已提交
642
    cookie = http.cookiejar.Cookie(
643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667
        version=0, name=name, value=value, port=None, port_specified=False,
        domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
        path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
        comment=None, comment_url=None, rest={})
    jar.set_cookie(cookie)
    return record_size


def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
    """
    References:
        - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
            - this data appears to be out of date but the important parts of the database structure is the same
            - there are a few bytes here and there which are skipped during parsing
    """
    if jar is None:
        jar = YoutubeDLCookieJar()
    page_sizes, body_start = _parse_safari_cookies_header(data, logger)
    p = DataParser(data[body_start:], logger)
    for page_size in page_sizes:
        _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
    p.skip_to_end('footer')
    return jar


668 669 670 671 672 673 674
class _LinuxDesktopEnvironment(Enum):
    """
    https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
    DesktopEnvironment
    """
    OTHER = auto()
    CINNAMON = auto()
675
    DEEPIN = auto()
676
    GNOME = auto()
677 678 679 680
    KDE3 = auto()
    KDE4 = auto()
    KDE5 = auto()
    KDE6 = auto()
681
    PANTHEON = auto()
682
    UKUI = auto()
683 684
    UNITY = auto()
    XFCE = auto()
685
    LXQT = auto()
686 687


688 689
class _LinuxKeyring(Enum):
    """
690
    https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
691 692
    SelectedLinuxBackend
    """
693
    KWALLET = auto()  # KDE4
694 695
    KWALLET5 = auto()
    KWALLET6 = auto()
696 697
    GNOMEKEYRING = auto()
    BASICTEXT = auto()
698 699 700 701 702


SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()


703
def _get_linux_desktop_environment(env, logger):
704 705 706 707 708 709 710 711 712 713 714 715 716 717
    """
    https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
    GetDesktopEnvironment
    """
    xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
    desktop_session = env.get('DESKTOP_SESSION', None)
    if xdg_current_desktop is not None:
        xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()

        if xdg_current_desktop == 'Unity':
            if desktop_session is not None and 'gnome-fallback' in desktop_session:
                return _LinuxDesktopEnvironment.GNOME
            else:
                return _LinuxDesktopEnvironment.UNITY
718 719
        elif xdg_current_desktop == 'Deepin':
            return _LinuxDesktopEnvironment.DEEPIN
720 721 722 723 724
        elif xdg_current_desktop == 'GNOME':
            return _LinuxDesktopEnvironment.GNOME
        elif xdg_current_desktop == 'X-Cinnamon':
            return _LinuxDesktopEnvironment.CINNAMON
        elif xdg_current_desktop == 'KDE':
725 726 727 728 729 730 731 732 733 734
            kde_version = env.get('KDE_SESSION_VERSION', None)
            if kde_version == '5':
                return _LinuxDesktopEnvironment.KDE5
            elif kde_version == '6':
                return _LinuxDesktopEnvironment.KDE6
            elif kde_version == '4':
                return _LinuxDesktopEnvironment.KDE4
            else:
                logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
                return _LinuxDesktopEnvironment.KDE4
735 736 737 738
        elif xdg_current_desktop == 'Pantheon':
            return _LinuxDesktopEnvironment.PANTHEON
        elif xdg_current_desktop == 'XFCE':
            return _LinuxDesktopEnvironment.XFCE
739 740 741 742 743 744 745
        elif xdg_current_desktop == 'UKUI':
            return _LinuxDesktopEnvironment.UKUI
        elif xdg_current_desktop == 'LXQt':
            return _LinuxDesktopEnvironment.LXQT
        else:
            logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')

746
    elif desktop_session is not None:
747 748 749
        if desktop_session == 'deepin':
            return _LinuxDesktopEnvironment.DEEPIN
        elif desktop_session in ('mate', 'gnome'):
750
            return _LinuxDesktopEnvironment.GNOME
751 752 753 754 755 756 757 758
        elif desktop_session in ('kde4', 'kde-plasma'):
            return _LinuxDesktopEnvironment.KDE4
        elif desktop_session == 'kde':
            if 'KDE_SESSION_VERSION' in env:
                return _LinuxDesktopEnvironment.KDE4
            else:
                return _LinuxDesktopEnvironment.KDE3
        elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
759
            return _LinuxDesktopEnvironment.XFCE
760 761 762 763 764
        elif desktop_session == 'ukui':
            return _LinuxDesktopEnvironment.UKUI
        else:
            logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')

765 766 767 768
    else:
        if 'GNOME_DESKTOP_SESSION_ID' in env:
            return _LinuxDesktopEnvironment.GNOME
        elif 'KDE_FULL_SESSION' in env:
769 770 771 772
            if 'KDE_SESSION_VERSION' in env:
                return _LinuxDesktopEnvironment.KDE4
            else:
                return _LinuxDesktopEnvironment.KDE3
773
    return _LinuxDesktopEnvironment.OTHER
774 775 776 777


def _choose_linux_keyring(logger):
    """
778 779 780 781 782 783 784 785 786
    SelectBackend in [1]

    There is currently support for forcing chromium to use BASIC_TEXT by creating a file called
    `Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1])
    does not appear to be called anywhere other than in tests, so the user would have to create this file manually
    and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring.

    References:
        - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc
787
    """
788
    desktop_environment = _get_linux_desktop_environment(os.environ, logger)
P
pukkandan 已提交
789
    logger.debug(f'detected desktop environment: {desktop_environment.name}')
790
    if desktop_environment == _LinuxDesktopEnvironment.KDE4:
791
        linux_keyring = _LinuxKeyring.KWALLET
792 793 794 795 796 797 798
    elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
        linux_keyring = _LinuxKeyring.KWALLET5
    elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
        linux_keyring = _LinuxKeyring.KWALLET6
    elif desktop_environment in (
        _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
    ):
799
        linux_keyring = _LinuxKeyring.BASICTEXT
800
    else:
801
        linux_keyring = _LinuxKeyring.GNOMEKEYRING
802 803 804
    return linux_keyring


805
def _get_kwallet_network_wallet(keyring, logger):
806 807
    """ The name of the wallet used to store network passwords.

808
    https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc
809 810 811 812 813 814 815
    KWalletDBus::NetworkWallet
    which does a dbus call to the following function:
    https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
    Wallet::NetworkWallet
    """
    default_wallet = 'kdewallet'
    try:
816
        if keyring == _LinuxKeyring.KWALLET:
817 818 819 820 821 822 823 824 825 826 827
            service_name = 'org.kde.kwalletd'
            wallet_path = '/modules/kwalletd'
        elif keyring == _LinuxKeyring.KWALLET5:
            service_name = 'org.kde.kwalletd5'
            wallet_path = '/modules/kwalletd5'
        elif keyring == _LinuxKeyring.KWALLET6:
            service_name = 'org.kde.kwalletd6'
            wallet_path = '/modules/kwalletd6'
        else:
            raise ValueError(keyring)

828
        stdout, _, returncode = Popen.run([
829
            'dbus-send', '--session', '--print-reply=literal',
830 831
            f'--dest={service_name}',
            wallet_path,
832
            'org.kde.KWallet.networkWallet'
833
        ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
834

835
        if returncode:
836 837 838
            logger.warning('failed to read NetworkWallet')
            return default_wallet
        else:
839 840
            logger.debug(f'NetworkWallet = "{stdout.strip()}"')
            return stdout.strip()
P
pukkandan 已提交
841
    except Exception as e:
P
pukkandan 已提交
842
        logger.warning(f'exception while obtaining NetworkWallet: {e}')
843 844 845
        return default_wallet


846 847
def _get_kwallet_password(browser_keyring_name, keyring, logger):
    logger.debug(f'using kwallet-query to obtain password from {keyring.name}')
848 849 850 851 852 853 854

    if shutil.which('kwallet-query') is None:
        logger.error('kwallet-query command not found. KWallet and kwallet-query '
                     'must be installed to read from KWallet. kwallet-query should be'
                     'included in the kwallet package for your distribution')
        return b''

855
    network_wallet = _get_kwallet_network_wallet(keyring, logger)
856 857

    try:
858
        stdout, _, returncode = Popen.run([
859
            'kwallet-query',
P
pukkandan 已提交
860 861
            '--read-password', f'{browser_keyring_name} Safe Storage',
            '--folder', f'{browser_keyring_name} Keys',
862 863 864
            network_wallet
        ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)

865 866 867
        if returncode:
            logger.error(f'kwallet-query failed with return code {returncode}. '
                         'Please consult the kwallet-query man page for details')
868 869 870 871 872 873 874 875 876
            return b''
        else:
            if stdout.lower().startswith(b'failed to read'):
                logger.debug('failed to read password from kwallet. Using empty string instead')
                # this sometimes occurs in KDE because chrome does not check hasEntry and instead
                # just tries to read the value (which kwallet returns "") whereas kwallet-query
                # checks hasEntry. To verify this:
                # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
                # while starting chrome.
877 878 879
                # this was identified as a bug later and fixed in
                # https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0
                # https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764
880 881 882
                return b''
            else:
                logger.debug('password found')
883
                return stdout.rstrip(b'\n')
P
pukkandan 已提交
884 885
    except Exception as e:
        logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
886 887 888 889
        return b''


def _get_gnome_keyring_password(browser_keyring_name, logger):
890 891
    if not secretstorage:
        logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
892 893 894 895 896 897 898 899
        return b''
    # the Gnome keyring does not seem to organise keys in the same way as KWallet,
    # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
    # and presumably searches for its key in the list. It appears that we must do the same.
    # https://github.com/jaraco/keyring/issues/556
    with contextlib.closing(secretstorage.dbus_init()) as con:
        col = secretstorage.get_default_collection(con)
        for item in col.get_all_items():
P
pukkandan 已提交
900
            if item.get_label() == f'{browser_keyring_name} Safe Storage':
901 902 903 904 905 906 907 908 909 910 911 912 913
                return item.get_secret()
        else:
            logger.error('failed to read from keyring')
            return b''


def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
    # note: chrome/chromium can be run with the following flags to determine which keyring backend
    # it has chosen to use
    # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
    # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
    # will not be sufficient in all cases.

914
    keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
915 916
    logger.debug(f'Chosen keyring: {keyring.name}')

917
    if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
918
        return _get_kwallet_password(browser_keyring_name, keyring, logger)
919
    elif keyring == _LinuxKeyring.GNOMEKEYRING:
920
        return _get_gnome_keyring_password(browser_keyring_name, logger)
921
    elif keyring == _LinuxKeyring.BASICTEXT:
922 923 924 925 926 927 928 929
        # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
        return None
    assert False, f'Unknown keyring {keyring}'


def _get_mac_keyring_password(browser_keyring_name, logger):
    logger.debug('using find-generic-password to obtain password from OSX keychain')
    try:
930
        stdout, _, returncode = Popen.run(
931 932 933
            ['security', 'find-generic-password',
             '-w',  # write password to stdout
             '-a', browser_keyring_name,  # match 'account'
P
pukkandan 已提交
934
             '-s', f'{browser_keyring_name} Safe Storage'],  # match 'service'
935
            stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
936 937 938
        if returncode:
            logger.warning('find-generic-password failed')
            return None
939
        return stdout.rstrip(b'\n')
P
pukkandan 已提交
940 941
    except Exception as e:
        logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
942
        return None
943 944 945


def _get_windows_v10_key(browser_root, logger):
946 947 948 949
    """
    References:
        - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
    """
950
    path = _find_most_recently_used_file(browser_root, 'Local State', logger)
951 952 953
    if path is None:
        logger.error('could not find local state file')
        return None
954
    logger.debug(f'Found local state file at "{path}"')
P
pukkandan 已提交
955
    with open(path, encoding='utf8') as f:
956 957
        data = json.load(f)
    try:
958
        # kOsCryptEncryptedKeyPrefName in [1]
959 960 961 962
        base64_key = data['os_crypt']['encrypted_key']
    except KeyError:
        logger.error('no encrypted key in Local State')
        return None
963
    encrypted_key = base64.b64decode(base64_key)
964
    # kDPAPIKeyPrefix in [1]
965 966 967 968 969 970 971 972 973 974 975
    prefix = b'DPAPI'
    if not encrypted_key.startswith(prefix):
        logger.error('invalid key')
        return None
    return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)


def pbkdf2_sha1(password, salt, iterations, key_length):
    return pbkdf2_hmac('sha1', password, salt, iterations, key_length)


976 977 978 979 980 981 982 983 984
def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
    for key in keys:
        plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
        try:
            return plaintext.decode()
        except UnicodeDecodeError:
            pass
    logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
    return None
985 986 987 988


def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
    try:
989
        plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
990
    except ValueError:
991
        logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
992 993 994
        return None

    try:
P
pukkandan 已提交
995
        return plaintext.decode()
996
    except UnicodeDecodeError:
997
        logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
998 999 1000 1001 1002 1003 1004 1005
        return None


def _decrypt_windows_dpapi(ciphertext, logger):
    """
    References:
        - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
    """
P
pukkandan 已提交
1006 1007 1008

    import ctypes
    import ctypes.wintypes
1009 1010

    class DATA_BLOB(ctypes.Structure):
P
pukkandan 已提交
1011
        _fields_ = [('cbData', ctypes.wintypes.DWORD),
1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026
                    ('pbData', ctypes.POINTER(ctypes.c_char))]

    buffer = ctypes.create_string_buffer(ciphertext)
    blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
    blob_out = DATA_BLOB()
    ret = ctypes.windll.crypt32.CryptUnprotectData(
        ctypes.byref(blob_in),  # pDataIn
        None,  # ppszDataDescr: human readable description of pDataIn
        None,  # pOptionalEntropy: salt?
        None,  # pvReserved: must be NULL
        None,  # pPromptStruct: information about prompts to display
        0,  # dwFlags
        ctypes.byref(blob_out)  # pDataOut
    )
    if not ret:
1027
        logger.warning('failed to decrypt with DPAPI', only_once=True)
1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
        return None

    result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
    ctypes.windll.kernel32.LocalFree(blob_out.pbData)
    return result


def _config_home():
    return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))


def _open_database_copy(database_path, tmpdir):
    # cannot open sqlite databases if they are already in use (e.g. by the browser)
    database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
    shutil.copy(database_path, database_copy_path)
    conn = sqlite3.connect(database_copy_path)
    return conn.cursor()


def _get_column_names(cursor, table_name):
P
pukkandan 已提交
1048
    table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
P
pukkandan 已提交
1049
    return [row[1].decode() for row in table_info]
1050 1051


1052
def _find_most_recently_used_file(root, filename, logger):
1053
    # if there are multiple browser profiles, take the most recently used one
1054 1055 1056 1057 1058 1059 1060 1061
    i, paths = 0, []
    with _create_progress_bar(logger) as progress_bar:
        for curr_root, dirs, files in os.walk(root):
            for file in files:
                i += 1
                progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
                if file == filename:
                    paths.append(os.path.join(curr_root, file))
1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078
    return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)


def _merge_cookie_jars(jars):
    output_jar = YoutubeDLCookieJar()
    for jar in jars:
        for cookie in jar:
            output_jar.set_cookie(cookie)
        if jar.filename is not None:
            output_jar.filename = jar.filename
    return output_jar


def _is_path(value):
    return os.path.sep in value


1079
def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
1080 1081
    if browser_name not in SUPPORTED_BROWSERS:
        raise ValueError(f'unsupported browser: "{browser_name}"')
1082 1083
    if keyring not in (None, *SUPPORTED_KEYRINGS):
        raise ValueError(f'unsupported keyring: "{keyring}"')
1084 1085
    if profile is not None and _is_path(expand_path(profile)):
        profile = expand_path(profile)
1086
    return browser_name, profile, keyring, container
1087 1088 1089 1090 1091


class LenientSimpleCookie(http.cookies.SimpleCookie):
    """More lenient version of http.cookies.SimpleCookie"""
    # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
1092 1093 1094
    # We use Morsel's legal key chars to avoid errors on setting values
    _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
    _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139

    _RESERVED = {
        "expires",
        "path",
        "comment",
        "domain",
        "max-age",
        "secure",
        "httponly",
        "version",
        "samesite",
    }

    _FLAGS = {"secure", "httponly"}

    # Added 'bad' group to catch the remaining value
    _COOKIE_PATTERN = re.compile(r"""
        \s*                            # Optional whitespace at start of cookie
        (?P<key>                       # Start of group 'key'
        [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
        )                              # End of group 'key'
        (                              # Optional group: there may not be a value.
        \s*=\s*                          # Equal Sign
        (                                # Start of potential value
        (?P<val>                           # Start of group 'val'
        "(?:[^\\"]|\\.)*"                    # Any doublequoted string
        |                                    # or
        \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
        |                                    # or
        [""" + _LEGAL_VALUE_CHARS + r"""]*     # Any word or empty string
        )                                  # End of group 'val'
        |                                  # or
        (?P<bad>(?:\\;|[^;])*?)            # 'bad' group fallback for invalid values
        )                                # End of potential value
        )?                             # End of optional value group
        \s*                            # Any number of spaces.
        (\s+|;|$)                      # Ending either at space, semicolon, or EOS.
        """, re.ASCII | re.VERBOSE)

    def load(self, data):
        # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
        if not isinstance(data, str):
            return super().load(data)

        morsel = None
1140 1141
        for match in self._COOKIE_PATTERN.finditer(data):
            if match.group('bad'):
1142 1143 1144
                morsel = None
                continue

1145
            key, value = match.group('key', 'val')
1146

1147 1148 1149 1150
            is_attribute = False
            if key.startswith('$'):
                key = key[1:]
                is_attribute = True
1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166

            lower_key = key.lower()
            if lower_key in self._RESERVED:
                if morsel is None:
                    continue

                if value is None:
                    if lower_key not in self._FLAGS:
                        morsel = None
                        continue
                    value = True
                else:
                    value, _ = self.value_decode(value)

                morsel[key] = value

1167 1168 1169
            elif is_attribute:
                morsel = None

1170 1171 1172 1173 1174 1175 1176 1177
            elif value is not None:
                morsel = self.get(key, http.cookies.Morsel())
                real_value, coded_value = self.value_decode(value)
                morsel.set(key, real_value, coded_value)
                self[key] = morsel

            else:
                morsel = None
1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215


class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
    """
    See [1] for cookie file format.

    1. https://curl.haxx.se/docs/http-cookies.html
    """
    _HTTPONLY_PREFIX = '#HttpOnly_'
    _ENTRY_LEN = 7
    _HEADER = '''# Netscape HTTP Cookie File
# This file is generated by yt-dlp.  Do not edit.

'''
    _CookieFileEntry = collections.namedtuple(
        'CookieFileEntry',
        ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))

    def __init__(self, filename=None, *args, **kwargs):
        super().__init__(None, *args, **kwargs)
        if is_path_like(filename):
            filename = os.fspath(filename)
        self.filename = filename

    @staticmethod
    def _true_or_false(cndn):
        return 'TRUE' if cndn else 'FALSE'

    @contextlib.contextmanager
    def open(self, file, *, write=False):
        if is_path_like(file):
            with open(file, 'w' if write else 'r', encoding='utf-8') as f:
                yield f
        else:
            if write:
                file.truncate(0)
            yield file

P
pukkandan 已提交
1216
    def _really_save(self, f, ignore_discard, ignore_expires):
1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236
        now = time.time()
        for cookie in self:
            if (not ignore_discard and cookie.discard
                    or not ignore_expires and cookie.is_expired(now)):
                continue
            name, value = cookie.name, cookie.value
            if value is None:
                # cookies.txt regards 'Set-Cookie: foo' as a cookie
                # with no name, whereas http.cookiejar regards it as a
                # cookie with no value.
                name, value = '', name
            f.write('%s\n' % '\t'.join((
                cookie.domain,
                self._true_or_false(cookie.domain.startswith('.')),
                cookie.path,
                self._true_or_false(cookie.secure),
                str_or_none(cookie.expires, default=''),
                name, value
            )))

P
pukkandan 已提交
1237
    def save(self, filename=None, ignore_discard=True, ignore_expires=True):
1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255
        """
        Save cookies to a file.
        Code is taken from CPython 3.6
        https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """

        if filename is None:
            if self.filename is not None:
                filename = self.filename
            else:
                raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)

        # Store session cookies with `expires` set to 0 instead of an empty string
        for cookie in self:
            if cookie.expires is None:
                cookie.expires = 0

        with self.open(filename, write=True) as f:
            f.write(self._HEADER)
P
pukkandan 已提交
1256
            self._really_save(f, ignore_discard, ignore_expires)
1257

P
pukkandan 已提交
1258
    def load(self, filename=None, ignore_discard=True, ignore_expires=True):
1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313
        """Load cookies from a file."""
        if filename is None:
            if self.filename is not None:
                filename = self.filename
            else:
                raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)

        def prepare_line(line):
            if line.startswith(self._HTTPONLY_PREFIX):
                line = line[len(self._HTTPONLY_PREFIX):]
            # comments and empty lines are fine
            if line.startswith('#') or not line.strip():
                return line
            cookie_list = line.split('\t')
            if len(cookie_list) != self._ENTRY_LEN:
                raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
            cookie = self._CookieFileEntry(*cookie_list)
            if cookie.expires_at and not cookie.expires_at.isdigit():
                raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
            return line

        cf = io.StringIO()
        with self.open(filename) as f:
            for line in f:
                try:
                    cf.write(prepare_line(line))
                except http.cookiejar.LoadError as e:
                    if f'{line.strip()} '[0] in '[{"':
                        raise http.cookiejar.LoadError(
                            'Cookies file must be Netscape formatted, not JSON. See  '
                            'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
                    write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
                    continue
        cf.seek(0)
        self._really_load(cf, filename, ignore_discard, ignore_expires)
        # Session cookies are denoted by either `expires` field set to
        # an empty string or 0. MozillaCookieJar only recognizes the former
        # (see [1]). So we need force the latter to be recognized as session
        # cookies on our own.
        # Session cookies may be important for cookies-based authentication,
        # e.g. usually, when user does not check 'Remember me' check box while
        # logging in on a site, some important cookies are stored as session
        # cookies so that not recognizing them will result in failed login.
        # 1. https://bugs.python.org/issue17164
        for cookie in self:
            # Treat `expires=0` cookies as session cookies
            if cookie.expires == 0:
                cookie.expires = None
                cookie.discard = True

    def get_cookie_header(self, url):
        """Generate a Cookie HTTP header for a given url"""
        cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))
        self.add_cookie_header(cookie_req)
        return cookie_req.get_header('Cookie')
P
pukkandan 已提交
1314

B
bashonly 已提交
1315 1316 1317 1318 1319 1320 1321
    def get_cookies_for_url(self, url):
        """Generate a list of Cookie objects for a given url"""
        # Policy `_now` attribute must be set before calling `_cookies_for_request`
        # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
        self._policy._now = self._now = int(time.time())
        return self._cookies_for_request(urllib.request.Request(escape_url(sanitize_url(url))))

P
pukkandan 已提交
1322 1323 1324
    def clear(self, *args, **kwargs):
        with contextlib.suppress(KeyError):
            return super().clear(*args, **kwargs)