cookies.py 38.0 KB
Newer Older
1
import contextlib
2 3 4 5 6 7 8 9 10
import ctypes
import json
import os
import shutil
import struct
import subprocess
import sys
import tempfile
from datetime import datetime, timedelta, timezone
11
from enum import Enum, auto
12 13
from hashlib import pbkdf2_hmac

P
pukkandan 已提交
14 15 16 17 18
from .aes import (
    aes_cbc_decrypt_bytes,
    aes_gcm_decrypt_and_verify_bytes,
    unpad_pkcs7,
)
P
pukkandan 已提交
19
from .compat import compat_b64decode, compat_cookiejar_Cookie
20 21 22 23 24
from .dependencies import (
    _SECRETSTORAGE_UNAVAILABLE_REASON,
    secretstorage,
    sqlite3,
)
25
from .minicurses import MultilinePrinter, QuietMultilinePrinter
P
pukkandan 已提交
26
from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51

CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}


class YDLLogger:
    def __init__(self, ydl=None):
        self._ydl = ydl

    def debug(self, message):
        if self._ydl:
            self._ydl.write_debug(message)

    def info(self, message):
        if self._ydl:
            self._ydl.to_screen(f'[Cookies] {message}')

    def warning(self, message, only_once=False):
        if self._ydl:
            self._ydl.report_warning(message, only_once)

    def error(self, message):
        if self._ydl:
            self._ydl.report_error(message)

52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
    def progress_bar(self):
        """Return a context manager with a print method. (Optional)"""
        # Do not print to files/pipes, loggers, or when --no-progress is used
        if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
            return
        file = self._ydl._out_files['error']
        try:
            if not file.isatty():
                return
        except BaseException:
            return

        printer = MultilinePrinter(file, preserve_output=False)
        printer.print = lambda message: printer.print_at_line(f'[Cookies] {message}', 0)
        return printer


def _create_progress_bar(logger):
    if hasattr(logger, 'progress_bar'):
        printer = logger.progress_bar()
        if printer:
            return printer
    printer = QuietMultilinePrinter()
    printer.print = lambda _: None
    return printer

78 79 80 81

def load_cookies(cookie_file, browser_specification, ydl):
    cookie_jars = []
    if browser_specification is not None:
82 83
        browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
        cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
84 85

    if cookie_file is not None:
86 87 88 89
        is_filename = YoutubeDLCookieJar.is_path(cookie_file)
        if is_filename:
            cookie_file = expand_path(cookie_file)

90
        jar = YoutubeDLCookieJar(cookie_file)
91
        if not is_filename or os.access(cookie_file, os.R_OK):
92 93 94 95 96 97
            jar.load(ignore_discard=True, ignore_expires=True)
        cookie_jars.append(jar)

    return _merge_cookie_jars(cookie_jars)


98
def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
99 100 101 102 103
    if browser_name == 'firefox':
        return _extract_firefox_cookies(profile, logger)
    elif browser_name == 'safari':
        return _extract_safari_cookies(profile, logger)
    elif browser_name in CHROMIUM_BASED_BROWSERS:
104
        return _extract_chrome_cookies(browser_name, profile, keyring, logger)
105
    else:
P
pukkandan 已提交
106
        raise ValueError(f'unknown browser: {browser_name}')
107 108 109 110


def _extract_firefox_cookies(profile, logger):
    logger.info('Extracting cookies from firefox')
111
    if not sqlite3:
112 113 114
        logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
                       'Please use a python interpreter compiled with sqlite3 support')
        return YoutubeDLCookieJar()
115 116 117 118 119 120 121 122

    if profile is None:
        search_root = _firefox_browser_dir()
    elif _is_path(profile):
        search_root = profile
    else:
        search_root = os.path.join(_firefox_browser_dir(), profile)

123
    cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
124
    if cookie_database_path is None:
P
pukkandan 已提交
125 126
        raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
    logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
127

128
    with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
129 130 131 132 133
        cursor = None
        try:
            cursor = _open_database_copy(cookie_database_path, tmpdir)
            cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
            jar = YoutubeDLCookieJar()
134 135 136 137 138 139 140 141 142 143 144
            with _create_progress_bar(logger) as progress_bar:
                table = cursor.fetchall()
                total_cookie_count = len(table)
                for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
                    progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
                    cookie = compat_cookiejar_Cookie(
                        version=0, name=name, value=value, port=None, port_specified=False,
                        domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
                        path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
                        comment=None, comment_url=None, rest={})
                    jar.set_cookie(cookie)
P
pukkandan 已提交
145
            logger.info(f'Extracted {len(jar)} cookies from firefox')
146 147 148 149 150 151 152 153 154 155
            return jar
        finally:
            if cursor is not None:
                cursor.connection.close()


def _firefox_browser_dir():
    if sys.platform in ('linux', 'linux2'):
        return os.path.expanduser('~/.mozilla/firefox')
    elif sys.platform == 'win32':
156
        return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
157 158 159
    elif sys.platform == 'darwin':
        return os.path.expanduser('~/Library/Application Support/Firefox')
    else:
P
pukkandan 已提交
160
        raise ValueError(f'unsupported platform: {sys.platform}')
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179


def _get_chromium_based_browser_settings(browser_name):
    # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
    if sys.platform in ('linux', 'linux2'):
        config = _config_home()
        browser_dir = {
            'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
            'chrome': os.path.join(config, 'google-chrome'),
            'chromium': os.path.join(config, 'chromium'),
            'edge': os.path.join(config, 'microsoft-edge'),
            'opera': os.path.join(config, 'opera'),
            'vivaldi': os.path.join(config, 'vivaldi'),
        }[browser_name]

    elif sys.platform == 'win32':
        appdata_local = os.path.expandvars('%LOCALAPPDATA%')
        appdata_roaming = os.path.expandvars('%APPDATA%')
        browser_dir = {
180 181 182 183 184 185
            'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
            'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
            'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
            'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
            'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
            'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
186 187 188 189 190 191 192 193 194 195 196 197 198 199
        }[browser_name]

    elif sys.platform == 'darwin':
        appdata = os.path.expanduser('~/Library/Application Support')
        browser_dir = {
            'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
            'chrome': os.path.join(appdata, 'Google/Chrome'),
            'chromium': os.path.join(appdata, 'Chromium'),
            'edge': os.path.join(appdata, 'Microsoft Edge'),
            'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
            'vivaldi': os.path.join(appdata, 'Vivaldi'),
        }[browser_name]

    else:
P
pukkandan 已提交
200
        raise ValueError(f'unsupported platform: {sys.platform}')
201 202 203 204 205 206 207

    # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
    # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
    keyring_name = {
        'brave': 'Brave',
        'chrome': 'Chrome',
        'chromium': 'Chromium',
P
pukkandan 已提交
208
        'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
209 210 211 212 213 214 215 216 217 218 219 220 221
        'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
        'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
    }[browser_name]

    browsers_without_profiles = {'opera'}

    return {
        'browser_dir': browser_dir,
        'keyring_name': keyring_name,
        'supports_profiles': browser_name not in browsers_without_profiles
    }


222
def _extract_chrome_cookies(browser_name, profile, keyring, logger):
P
pukkandan 已提交
223
    logger.info(f'Extracting cookies from {browser_name}')
224

225
    if not sqlite3:
226 227
        logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
                       'Please use a python interpreter compiled with sqlite3 support')
228 229
        return YoutubeDLCookieJar()

230 231 232 233 234 235 236 237 238 239 240
    config = _get_chromium_based_browser_settings(browser_name)

    if profile is None:
        search_root = config['browser_dir']
    elif _is_path(profile):
        search_root = profile
        config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
    else:
        if config['supports_profiles']:
            search_root = os.path.join(config['browser_dir'], profile)
        else:
P
pukkandan 已提交
241
            logger.error(f'{browser_name} does not support profiles')
242 243
            search_root = config['browser_dir']

244
    cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
245
    if cookie_database_path is None:
P
pukkandan 已提交
246 247
        raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
    logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
248

249
    decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
250

251
    with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
252 253 254 255 256 257
        cursor = None
        try:
            cursor = _open_database_copy(cookie_database_path, tmpdir)
            cursor.connection.text_factory = bytes
            column_names = _get_column_names(cursor, 'cookies')
            secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
258
            cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
259 260
            jar = YoutubeDLCookieJar()
            failed_cookies = 0
261
            unencrypted_cookies = 0
262 263 264 265 266 267 268
            with _create_progress_bar(logger) as progress_bar:
                table = cursor.fetchall()
                total_cookie_count = len(table)
                for i, line in enumerate(table):
                    progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
                    is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
                    if not cookie:
269 270
                        failed_cookies += 1
                        continue
271 272 273
                    elif not is_encrypted:
                        unencrypted_cookies += 1
                    jar.set_cookie(cookie)
274
            if failed_cookies > 0:
P
pukkandan 已提交
275
                failed_message = f' ({failed_cookies} could not be decrypted)'
276 277
            else:
                failed_message = ''
P
pukkandan 已提交
278
            logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
279 280
            counts = decryptor.cookie_counts.copy()
            counts['unencrypted'] = unencrypted_cookies
P
pukkandan 已提交
281
            logger.debug(f'cookie version breakdown: {counts}')
282 283 284 285 286 287
            return jar
        finally:
            if cursor is not None:
                cursor.connection.close()


288
def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
P
pukkandan 已提交
289 290 291 292
    host_key = host_key.decode()
    name = name.decode()
    value = value.decode()
    path = path.decode()
293 294 295 296 297 298 299 300 301 302 303 304 305 306
    is_encrypted = not value and encrypted_value

    if is_encrypted:
        value = decryptor.decrypt(encrypted_value)
        if value is None:
            return is_encrypted, None

    return is_encrypted, compat_cookiejar_Cookie(
        version=0, name=name, value=value, port=None, port_specified=False,
        domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
        path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
        comment=None, comment_url=None, rest={})


307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333
class ChromeCookieDecryptor:
    """
    Overview:

        Linux:
        - cookies are either v10 or v11
            - v10: AES-CBC encrypted with a fixed key
            - v11: AES-CBC encrypted with an OS protected key (keyring)
            - v11 keys can be stored in various places depending on the activate desktop environment [2]

        Mac:
        - cookies are either v10 or not v10
            - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
            - not v10: 'old data' stored as plaintext

        Windows:
        - cookies are either v10 or not v10
            - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
            - not v10: encrypted with DPAPI

    Sources:
    - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
    - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
        - KeyStorageLinux::CreateService
    """

    def decrypt(self, encrypted_value):
334
        raise NotImplementedError('Must be implemented by sub classes')
335

336 337
    @property
    def cookie_counts(self):
338
        raise NotImplementedError('Must be implemented by sub classes')
339

340

341
def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
342
    if sys.platform in ('linux', 'linux2'):
343
        return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
344 345 346 347 348
    elif sys.platform == 'darwin':
        return MacChromeCookieDecryptor(browser_keyring_name, logger)
    elif sys.platform == 'win32':
        return WindowsChromeCookieDecryptor(browser_root, logger)
    else:
349
        raise NotImplementedError(f'Chrome cookie decryption is not supported on this platform: {sys.platform}')
350 351 352


class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
353
    def __init__(self, browser_keyring_name, logger, *, keyring=None):
354 355
        self._logger = logger
        self._v10_key = self.derive_key(b'peanuts')
356 357 358
        password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
        self._v11_key = None if password is None else self.derive_key(password)
        self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
359 360 361 362 363 364 365

    @staticmethod
    def derive_key(password):
        # values from
        # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
        return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)

366 367 368 369
    @property
    def cookie_counts(self):
        return self._cookie_counts

370 371 372 373 374
    def decrypt(self, encrypted_value):
        version = encrypted_value[:3]
        ciphertext = encrypted_value[3:]

        if version == b'v10':
375
            self._cookie_counts['v10'] += 1
376 377 378
            return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)

        elif version == b'v11':
379
            self._cookie_counts['v11'] += 1
380
            if self._v11_key is None:
381
                self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
382 383 384 385
                return None
            return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)

        else:
386
            self._cookie_counts['other'] += 1
387 388 389 390 391 392
            return None


class MacChromeCookieDecryptor(ChromeCookieDecryptor):
    def __init__(self, browser_keyring_name, logger):
        self._logger = logger
393
        password = _get_mac_keyring_password(browser_keyring_name, logger)
394
        self._v10_key = None if password is None else self.derive_key(password)
395
        self._cookie_counts = {'v10': 0, 'other': 0}
396 397 398 399 400 401 402

    @staticmethod
    def derive_key(password):
        # values from
        # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
        return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)

403 404 405 406
    @property
    def cookie_counts(self):
        return self._cookie_counts

407 408 409 410 411
    def decrypt(self, encrypted_value):
        version = encrypted_value[:3]
        ciphertext = encrypted_value[3:]

        if version == b'v10':
412
            self._cookie_counts['v10'] += 1
413 414 415 416 417 418 419
            if self._v10_key is None:
                self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
                return None

            return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)

        else:
420
            self._cookie_counts['other'] += 1
421 422 423 424 425 426 427 428 429
            # other prefixes are considered 'old data' which were stored as plaintext
            # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
            return encrypted_value


class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
    def __init__(self, browser_root, logger):
        self._logger = logger
        self._v10_key = _get_windows_v10_key(browser_root, logger)
430 431 432 433 434
        self._cookie_counts = {'v10': 0, 'other': 0}

    @property
    def cookie_counts(self):
        return self._cookie_counts
435 436 437 438 439 440

    def decrypt(self, encrypted_value):
        version = encrypted_value[:3]
        ciphertext = encrypted_value[3:]

        if version == b'v10':
441
            self._cookie_counts['v10'] += 1
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
            if self._v10_key is None:
                self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
                return None

            # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
            #   kNonceLength
            nonce_length = 96 // 8
            # boringssl
            #   EVP_AEAD_AES_GCM_TAG_LEN
            authentication_tag_length = 16

            raw_ciphertext = ciphertext
            nonce = raw_ciphertext[:nonce_length]
            ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
            authentication_tag = raw_ciphertext[-authentication_tag_length:]

            return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)

        else:
461
            self._cookie_counts['other'] += 1
462 463
            # any other prefix means the data is DPAPI encrypted
            # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
P
pukkandan 已提交
464
            return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
465 466 467 468 469 470


def _extract_safari_cookies(profile, logger):
    if profile is not None:
        logger.error('safari does not support profiles')
    if sys.platform != 'darwin':
P
pukkandan 已提交
471
        raise ValueError(f'unsupported platform: {sys.platform}')
472 473 474 475

    cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')

    if not os.path.isfile(cookies_path):
476 477 478 479
        logger.debug('Trying secondary cookie location')
        cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
        if not os.path.isfile(cookies_path):
            raise FileNotFoundError('could not find safari cookies database')
480 481 482 483 484

    with open(cookies_path, 'rb') as f:
        cookies_data = f.read()

    jar = parse_safari_cookies(cookies_data, logger=logger)
P
pukkandan 已提交
485
    logger.info(f'Extracted {len(jar)} cookies from safari')
486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
    return jar


class ParserError(Exception):
    pass


class DataParser:
    def __init__(self, data, logger):
        self._data = data
        self.cursor = 0
        self._logger = logger

    def read_bytes(self, num_bytes):
        if num_bytes < 0:
P
pukkandan 已提交
501
            raise ParserError(f'invalid read of {num_bytes} bytes')
502 503 504 505 506 507 508 509 510 511
        end = self.cursor + num_bytes
        if end > len(self._data):
            raise ParserError('reached end of input')
        data = self._data[self.cursor:end]
        self.cursor = end
        return data

    def expect_bytes(self, expected_value, message):
        value = self.read_bytes(len(expected_value))
        if value != expected_value:
P
pukkandan 已提交
512
            raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
513 514 515 516 517 518 519 520 521 522 523 524 525 526

    def read_uint(self, big_endian=False):
        data_format = '>I' if big_endian else '<I'
        return struct.unpack(data_format, self.read_bytes(4))[0]

    def read_double(self, big_endian=False):
        data_format = '>d' if big_endian else '<d'
        return struct.unpack(data_format, self.read_bytes(8))[0]

    def read_cstring(self):
        buffer = []
        while True:
            c = self.read_bytes(1)
            if c == b'\x00':
P
pukkandan 已提交
527
                return b''.join(buffer).decode()
528 529 530 531 532
            else:
                buffer.append(c)

    def skip(self, num_bytes, description='unknown'):
        if num_bytes > 0:
533
            self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
534
        elif num_bytes < 0:
P
pukkandan 已提交
535
            raise ParserError(f'invalid skip of {num_bytes} bytes')
536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561

    def skip_to(self, offset, description='unknown'):
        self.skip(offset - self.cursor, description)

    def skip_to_end(self, description='unknown'):
        self.skip_to(len(self._data), description)


def _mac_absolute_time_to_posix(timestamp):
    return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())


def _parse_safari_cookies_header(data, logger):
    p = DataParser(data, logger)
    p.expect_bytes(b'cook', 'database signature')
    number_of_pages = p.read_uint(big_endian=True)
    page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
    return page_sizes, p.cursor


def _parse_safari_cookies_page(data, jar, logger):
    p = DataParser(data, logger)
    p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
    number_of_cookies = p.read_uint()
    record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
    if number_of_cookies == 0:
P
pukkandan 已提交
562
        logger.debug(f'a cookies page of size {len(data)} has no cookies')
563 564 565 566
        return

    p.skip_to(record_offsets[0], 'unknown page header field')

567 568 569 570 571 572
    with _create_progress_bar(logger) as progress_bar:
        for i, record_offset in enumerate(record_offsets):
            progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
            p.skip_to(record_offset, 'space between records')
            record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
            p.read_bytes(record_length)
573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603
    p.skip_to_end('space in between pages')


def _parse_safari_cookies_record(data, jar, logger):
    p = DataParser(data, logger)
    record_size = p.read_uint()
    p.skip(4, 'unknown record field 1')
    flags = p.read_uint()
    is_secure = bool(flags & 0x0001)
    p.skip(4, 'unknown record field 2')
    domain_offset = p.read_uint()
    name_offset = p.read_uint()
    path_offset = p.read_uint()
    value_offset = p.read_uint()
    p.skip(8, 'unknown record field 3')
    expiration_date = _mac_absolute_time_to_posix(p.read_double())
    _creation_date = _mac_absolute_time_to_posix(p.read_double())  # noqa: F841

    try:
        p.skip_to(domain_offset)
        domain = p.read_cstring()

        p.skip_to(name_offset)
        name = p.read_cstring()

        p.skip_to(path_offset)
        path = p.read_cstring()

        p.skip_to(value_offset)
        value = p.read_cstring()
    except UnicodeDecodeError:
604
        logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634
        return record_size

    p.skip_to(record_size, 'space at the end of the record')

    cookie = compat_cookiejar_Cookie(
        version=0, name=name, value=value, port=None, port_specified=False,
        domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
        path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
        comment=None, comment_url=None, rest={})
    jar.set_cookie(cookie)
    return record_size


def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
    """
    References:
        - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
            - this data appears to be out of date but the important parts of the database structure is the same
            - there are a few bytes here and there which are skipped during parsing
    """
    if jar is None:
        jar = YoutubeDLCookieJar()
    page_sizes, body_start = _parse_safari_cookies_header(data, logger)
    p = DataParser(data[body_start:], logger)
    for page_size in page_sizes:
        _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
    p.skip_to_end('footer')
    return jar


635 636 637 638 639 640 641 642 643 644 645 646
class _LinuxDesktopEnvironment(Enum):
    """
    https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
    DesktopEnvironment
    """
    OTHER = auto()
    CINNAMON = auto()
    GNOME = auto()
    KDE = auto()
    PANTHEON = auto()
    UNITY = auto()
    XFCE = auto()
647 648


649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698
class _LinuxKeyring(Enum):
    """
    https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
    SelectedLinuxBackend
    """
    KWALLET = auto()
    GNOMEKEYRING = auto()
    BASICTEXT = auto()


SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()


def _get_linux_desktop_environment(env):
    """
    https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
    GetDesktopEnvironment
    """
    xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
    desktop_session = env.get('DESKTOP_SESSION', None)
    if xdg_current_desktop is not None:
        xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()

        if xdg_current_desktop == 'Unity':
            if desktop_session is not None and 'gnome-fallback' in desktop_session:
                return _LinuxDesktopEnvironment.GNOME
            else:
                return _LinuxDesktopEnvironment.UNITY
        elif xdg_current_desktop == 'GNOME':
            return _LinuxDesktopEnvironment.GNOME
        elif xdg_current_desktop == 'X-Cinnamon':
            return _LinuxDesktopEnvironment.CINNAMON
        elif xdg_current_desktop == 'KDE':
            return _LinuxDesktopEnvironment.KDE
        elif xdg_current_desktop == 'Pantheon':
            return _LinuxDesktopEnvironment.PANTHEON
        elif xdg_current_desktop == 'XFCE':
            return _LinuxDesktopEnvironment.XFCE
    elif desktop_session is not None:
        if desktop_session in ('mate', 'gnome'):
            return _LinuxDesktopEnvironment.GNOME
        elif 'kde' in desktop_session:
            return _LinuxDesktopEnvironment.KDE
        elif 'xfce' in desktop_session:
            return _LinuxDesktopEnvironment.XFCE
    else:
        if 'GNOME_DESKTOP_SESSION_ID' in env:
            return _LinuxDesktopEnvironment.GNOME
        elif 'KDE_FULL_SESSION' in env:
            return _LinuxDesktopEnvironment.KDE
699
    return _LinuxDesktopEnvironment.OTHER
700 701 702 703 704 705 706 707


def _choose_linux_keyring(logger):
    """
    https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
    SelectBackend
    """
    desktop_environment = _get_linux_desktop_environment(os.environ)
P
pukkandan 已提交
708
    logger.debug(f'detected desktop environment: {desktop_environment.name}')
709 710 711 712
    if desktop_environment == _LinuxDesktopEnvironment.KDE:
        linux_keyring = _LinuxKeyring.KWALLET
    elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
        linux_keyring = _LinuxKeyring.BASICTEXT
713
    else:
714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740
        linux_keyring = _LinuxKeyring.GNOMEKEYRING
    return linux_keyring


def _get_kwallet_network_wallet(logger):
    """ The name of the wallet used to store network passwords.

    https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
    KWalletDBus::NetworkWallet
    which does a dbus call to the following function:
    https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
    Wallet::NetworkWallet
    """
    default_wallet = 'kdewallet'
    try:
        proc = Popen([
            'dbus-send', '--session', '--print-reply=literal',
            '--dest=org.kde.kwalletd5',
            '/modules/kwalletd5',
            'org.kde.KWallet.networkWallet'
        ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)

        stdout, stderr = proc.communicate_or_kill()
        if proc.returncode != 0:
            logger.warning('failed to read NetworkWallet')
            return default_wallet
        else:
P
pukkandan 已提交
741
            network_wallet = stdout.decode().strip()
P
pukkandan 已提交
742
            logger.debug(f'NetworkWallet = "{network_wallet}"')
743
            return network_wallet
P
pukkandan 已提交
744
    except Exception as e:
P
pukkandan 已提交
745
        logger.warning(f'exception while obtaining NetworkWallet: {e}')
746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
        return default_wallet


def _get_kwallet_password(browser_keyring_name, logger):
    logger.debug('using kwallet-query to obtain password from kwallet')

    if shutil.which('kwallet-query') is None:
        logger.error('kwallet-query command not found. KWallet and kwallet-query '
                     'must be installed to read from KWallet. kwallet-query should be'
                     'included in the kwallet package for your distribution')
        return b''

    network_wallet = _get_kwallet_network_wallet(logger)

    try:
        proc = Popen([
            'kwallet-query',
P
pukkandan 已提交
763 764
            '--read-password', f'{browser_keyring_name} Safe Storage',
            '--folder', f'{browser_keyring_name} Keys',
765 766 767 768 769
            network_wallet
        ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)

        stdout, stderr = proc.communicate_or_kill()
        if proc.returncode != 0:
770 771
            logger.error(f'kwallet-query failed with return code {proc.returncode}. Please consult '
                         'the kwallet-query man page for details')
772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788
            return b''
        else:
            if stdout.lower().startswith(b'failed to read'):
                logger.debug('failed to read password from kwallet. Using empty string instead')
                # this sometimes occurs in KDE because chrome does not check hasEntry and instead
                # just tries to read the value (which kwallet returns "") whereas kwallet-query
                # checks hasEntry. To verify this:
                # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
                # while starting chrome.
                # this may be a bug as the intended behaviour is to generate a random password and store
                # it, but that doesn't matter here.
                return b''
            else:
                logger.debug('password found')
                if stdout[-1:] == b'\n':
                    stdout = stdout[:-1]
                return stdout
P
pukkandan 已提交
789 790
    except Exception as e:
        logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
791 792 793 794
        return b''


def _get_gnome_keyring_password(browser_keyring_name, logger):
795 796
    if not secretstorage:
        logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
797 798 799 800 801 802 803 804
        return b''
    # the Gnome keyring does not seem to organise keys in the same way as KWallet,
    # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
    # and presumably searches for its key in the list. It appears that we must do the same.
    # https://github.com/jaraco/keyring/issues/556
    with contextlib.closing(secretstorage.dbus_init()) as con:
        col = secretstorage.get_default_collection(con)
        for item in col.get_all_items():
P
pukkandan 已提交
805
            if item.get_label() == f'{browser_keyring_name} Safe Storage':
806 807 808 809 810 811 812 813 814 815 816 817 818
                return item.get_secret()
        else:
            logger.error('failed to read from keyring')
            return b''


def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
    # note: chrome/chromium can be run with the following flags to determine which keyring backend
    # it has chosen to use
    # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
    # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
    # will not be sufficient in all cases.

819
    keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
820 821 822 823 824 825 826 827 828 829 830 831 832 833 834
    logger.debug(f'Chosen keyring: {keyring.name}')

    if keyring == _LinuxKeyring.KWALLET:
        return _get_kwallet_password(browser_keyring_name, logger)
    elif keyring == _LinuxKeyring.GNOMEKEYRING:
        return _get_gnome_keyring_password(browser_keyring_name, logger)
    elif keyring == _LinuxKeyring.BASICTEXT:
        # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
        return None
    assert False, f'Unknown keyring {keyring}'


def _get_mac_keyring_password(browser_keyring_name, logger):
    logger.debug('using find-generic-password to obtain password from OSX keychain')
    try:
835 836 837 838
        proc = Popen(
            ['security', 'find-generic-password',
             '-w',  # write password to stdout
             '-a', browser_keyring_name,  # match 'account'
P
pukkandan 已提交
839
             '-s', f'{browser_keyring_name} Safe Storage'],  # match 'service'
840
            stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
841 842 843 844 845

        stdout, stderr = proc.communicate_or_kill()
        if stdout[-1:] == b'\n':
            stdout = stdout[:-1]
        return stdout
P
pukkandan 已提交
846 847
    except Exception as e:
        logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
848
        return None
849 850 851


def _get_windows_v10_key(browser_root, logger):
852
    path = _find_most_recently_used_file(browser_root, 'Local State', logger)
853 854 855
    if path is None:
        logger.error('could not find local state file')
        return None
856
    logger.debug(f'Found local state file at "{path}"')
P
pukkandan 已提交
857
    with open(path, encoding='utf8') as f:
858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876
        data = json.load(f)
    try:
        base64_key = data['os_crypt']['encrypted_key']
    except KeyError:
        logger.error('no encrypted key in Local State')
        return None
    encrypted_key = compat_b64decode(base64_key)
    prefix = b'DPAPI'
    if not encrypted_key.startswith(prefix):
        logger.error('invalid key')
        return None
    return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)


def pbkdf2_sha1(password, salt, iterations, key_length):
    return pbkdf2_hmac('sha1', password, salt, iterations, key_length)


def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
P
pukkandan 已提交
877
    plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
878
    try:
P
pukkandan 已提交
879
        return plaintext.decode()
880
    except UnicodeDecodeError:
881
        logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
882 883 884 885 886
        return None


def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
    try:
887
        plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
888
    except ValueError:
889
        logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
890 891 892
        return None

    try:
P
pukkandan 已提交
893
        return plaintext.decode()
894
    except UnicodeDecodeError:
895
        logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922
        return None


def _decrypt_windows_dpapi(ciphertext, logger):
    """
    References:
        - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
    """
    from ctypes.wintypes import DWORD

    class DATA_BLOB(ctypes.Structure):
        _fields_ = [('cbData', DWORD),
                    ('pbData', ctypes.POINTER(ctypes.c_char))]

    buffer = ctypes.create_string_buffer(ciphertext)
    blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
    blob_out = DATA_BLOB()
    ret = ctypes.windll.crypt32.CryptUnprotectData(
        ctypes.byref(blob_in),  # pDataIn
        None,  # ppszDataDescr: human readable description of pDataIn
        None,  # pOptionalEntropy: salt?
        None,  # pvReserved: must be NULL
        None,  # pPromptStruct: information about prompts to display
        0,  # dwFlags
        ctypes.byref(blob_out)  # pDataOut
    )
    if not ret:
923
        logger.warning('failed to decrypt with DPAPI', only_once=True)
924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943
        return None

    result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
    ctypes.windll.kernel32.LocalFree(blob_out.pbData)
    return result


def _config_home():
    return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))


def _open_database_copy(database_path, tmpdir):
    # cannot open sqlite databases if they are already in use (e.g. by the browser)
    database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
    shutil.copy(database_path, database_copy_path)
    conn = sqlite3.connect(database_copy_path)
    return conn.cursor()


def _get_column_names(cursor, table_name):
P
pukkandan 已提交
944
    table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
P
pukkandan 已提交
945
    return [row[1].decode() for row in table_info]
946 947


948
def _find_most_recently_used_file(root, filename, logger):
949
    # if there are multiple browser profiles, take the most recently used one
950 951 952 953 954 955 956 957
    i, paths = 0, []
    with _create_progress_bar(logger) as progress_bar:
        for curr_root, dirs, files in os.walk(root):
            for file in files:
                i += 1
                progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
                if file == filename:
                    paths.append(os.path.join(curr_root, file))
958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974
    return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)


def _merge_cookie_jars(jars):
    output_jar = YoutubeDLCookieJar()
    for jar in jars:
        for cookie in jar:
            output_jar.set_cookie(cookie)
        if jar.filename is not None:
            output_jar.filename = jar.filename
    return output_jar


def _is_path(value):
    return os.path.sep in value


975
def _parse_browser_specification(browser_name, profile=None, keyring=None):
976 977
    if browser_name not in SUPPORTED_BROWSERS:
        raise ValueError(f'unsupported browser: "{browser_name}"')
978 979
    if keyring not in (None, *SUPPORTED_KEYRINGS):
        raise ValueError(f'unsupported keyring: "{keyring}"')
980 981
    if profile is not None and _is_path(profile):
        profile = os.path.expanduser(profile)
982
    return browser_name, profile, keyring