...
 
Commits (5)
    https://gitcode.net/XianxinMao/yt-dlp/-/commit/bdd0b75e3f41ff35440eda6d395008beef19ef2f [ie/BiliBiliBangumi] Fix extractors (#7337) 2023-07-08T22:26:03+00:00 GD-Slime 82302542+GD-Slime@users.noreply.github.com - Overhaul BiliBiliBangumi extractor for the site's new API - Add BiliBiliBangumiSeason extractor - Refactor BiliBiliBangumiMedia extractor Closes #6701, Closes #7400 Authored by: GD-Slime https://gitcode.net/XianxinMao/yt-dlp/-/commit/325191d0c9bf3fe257b8a7c2eb95080f44f6ddfc [ie/vrt] Update token signing key (#7519) 2023-07-10T13:15:47+00:00 Zprokkel 105783800+Zprokkel@users.noreply.github.com Authored by: Zprokkel https://gitcode.net/XianxinMao/yt-dlp/-/commit/2af4eeb77246b8183aae75a0a8d19f18c08115b2 [utils] `clean_podcast_url`: Handle more trackers (#7556) 2023-07-11T06:30:38+05:30 Mahmoud Abdel-Fattah accounts@abdel-fattah.net Authored by: mabdelfattah, bashonly Closes #7544 https://gitcode.net/XianxinMao/yt-dlp/-/commit/2cfe221fbbe46faa3f46552c08d947a51f424903 [ie/streamanity] Remove (#7571) 2023-07-13T19:47:05+05:30 Aleri Kaisattera 73682764+alerikaisattera@users.noreply.github.com Service is dead Authored by: alerikaisattera https://gitcode.net/XianxinMao/yt-dlp/-/commit/8a4cd12c8f8e93292e3e95200b9d17a3af39624c [pp/EmbedThumbnail] Support `m4v` (#7583) 2023-07-14T02:09:21+05:30 Neurognostic donovan@tremura.email Authored by: Neurognostic
......@@ -1835,6 +1835,8 @@ def test_iri_to_uri(self):
def test_clean_podcast_url(self):
self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
self.assertEqual(clean_podcast_url('https://pdst.fm/e/2.gum.fm/chtbl.com/track/chrt.fm/track/34D33/pscrb.fm/rss/p/traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661'), 'https://traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661')
self.assertEqual(clean_podcast_url('https://pdst.fm/e/https://mgln.ai/e/441/www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3'), 'https://www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3')
def test_LazyList(self):
it = list(range(10))
......
......@@ -214,6 +214,7 @@
from .bilibili import (
BiliBiliIE,
BiliBiliBangumiIE,
BiliBiliBangumiSeasonIE,
BiliBiliBangumiMediaIE,
BiliBiliSearchIE,
BilibiliCategoryIE,
......@@ -1870,7 +1871,6 @@
StoryFireSeriesIE,
)
from .streamable import StreamableIE
from .streamanity import StreamanityIE
from .streamcloud import StreamcloudIE
from .streamcz import StreamCZIE
from .streamff import StreamFFIE
......
......@@ -18,6 +18,7 @@
float_or_none,
format_field,
int_or_none,
join_nonempty,
make_archive_id,
merge_dicts,
mimetype2ext,
......@@ -135,6 +136,17 @@ def _get_all_children(self, reply):
for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
yield from children
def _get_episodes_from_season(self, ss_id, url):
season_info = self._download_json(
'https://api.bilibili.com/pgc/web/season/section', ss_id,
note='Downloading season info', query={'season_id': ss_id},
headers={'Referer': url, **self.geo_verification_headers()})
for entry in traverse_obj(season_info, (
'result', 'main_section', 'episodes',
lambda _, v: url_or_none(v['share_url']) and v['id'])):
yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}')
class BiliBiliIE(BilibiliBaseIE):
_VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
......@@ -403,76 +415,93 @@ def _real_extract(self, url):
class BiliBiliBangumiIE(BilibiliBaseIE):
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/(?P<id>(?:ss|ep)\d+)'
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
_TESTS = [{
'url': 'https://www.bilibili.com/bangumi/play/ss897',
'url': 'https://www.bilibili.com/bangumi/play/ep267851',
'info_dict': {
'id': 'ss897',
'id': '267851',
'ext': 'mp4',
'series': '神的记事本',
'season': '神的记事本',
'season_id': 897,
'series': '鬼灭之刃',
'series_id': '4358',
'season': '鬼灭之刃',
'season_id': '26801',
'season_number': 1,
'episode': '你与旅行包',
'episode_number': 2,
'title': '神的记事本:第2话 你与旅行包',
'duration': 1428.487,
'timestamp': 1310809380,
'upload_date': '20110716',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'episode': '残酷',
'episode_id': '267851',
'episode_number': 1,
'title': '1 残酷',
'duration': 1425.256,
'timestamp': 1554566400,
'upload_date': '20190406',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
},
}, {
'url': 'https://www.bilibili.com/bangumi/play/ep508406',
'only_matching': True,
'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
}]
def _real_extract(self, url):
video_id = self._match_id(url)
episode_id = video_id[2:]
webpage = self._download_webpage(url, video_id)
if '您所在的地区无法观看本片' in webpage:
raise GeoRestrictedError('This video is restricted')
elif ('开通大会员观看' in webpage and '__playinfo__' not in webpage
or '正在观看预览,大会员免费看全片' in webpage):
elif '正在观看预览,大会员免费看全片' in webpage:
self.raise_login_required('This video is for premium members only')
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
headers = {'Referer': url, **self.geo_verification_headers()}
play_info = self._download_json(
'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id,
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
headers=headers)
premium_only = play_info.get('code') == -10403
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
formats = self.extract_formats(play_info)
if (not formats and '成为大会员抢先看' in webpage
and play_info.get('durl') and not play_info.get('dash')):
if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
self.raise_login_required('This video is for premium members only')
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
bangumi_info = self._download_json(
'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details',
query={'ep_id': episode_id}, headers=headers)['result']
season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id'))
episode_number, episode_info = next((
(idx, ep) for idx, ep in enumerate(traverse_obj(
bangumi_info, ('episodes', ..., {dict})), 1)
if str_or_none(ep.get('id')) == episode_id), (1, {}))
season_id = bangumi_info.get('season_id')
season_number = season_id and next((
idx + 1 for idx, e in enumerate(
traverse_obj(initial_state, ('mediaInfo', 'seasons', ...)))
traverse_obj(bangumi_info, ('seasons', ...)))
if e.get('season_id') == season_id
), None)
aid = episode_info.get('aid')
return {
'id': video_id,
'formats': formats,
'title': traverse_obj(initial_state, 'h1Title'),
'episode': traverse_obj(initial_state, ('epInfo', 'long_title')),
'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))),
'series': traverse_obj(initial_state, ('mediaInfo', 'series')),
'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')),
'season_id': season_id,
**traverse_obj(bangumi_info, {
'series': ('series', 'series_title', {str}),
'series_id': ('series', 'series_id', {str_or_none}),
'thumbnail': ('square_cover', {url_or_none}),
}),
'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info),
'episode': episode_info.get('long_title'),
'episode_id': episode_id,
'episode_number': int_or_none(episode_info.get('title')) or episode_number,
'season_id': str_or_none(season_id),
'season_number': season_number,
'thumbnail': traverse_obj(initial_state, ('epInfo', 'cover')),
'timestamp': traverse_obj(initial_state, ('epInfo', 'pub_time')),
'timestamp': int_or_none(episode_info.get('pub_time')),
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'subtitles': self.extract_subtitles(
video_id, initial_state, traverse_obj(initial_state, ('epInfo', 'cid'))),
'__post_extractor': self.extract_comments(traverse_obj(initial_state, ('epInfo', 'aid'))),
'http_headers': {'Referer': url, **self.geo_verification_headers()},
'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')),
'__post_extractor': self.extract_comments(aid),
'http_headers': headers,
}
class BiliBiliBangumiMediaIE(InfoExtractor):
class BiliBiliBangumiMediaIE(BilibiliBaseIE):
_VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.bilibili.com/bangumi/media/md24097891',
......@@ -485,16 +514,26 @@ class BiliBiliBangumiMediaIE(InfoExtractor):
def _real_extract(self, url):
media_id = self._match_id(url)
webpage = self._download_webpage(url, media_id)
ss_id = self._search_json(
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id']
return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id)
class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/ss(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.bilibili.com/bangumi/play/ss26801',
'info_dict': {
'id': '26801'
},
'playlist_mincount': 26
}]
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
episode_list = self._download_json(
'https://api.bilibili.com/pgc/web/season/section', media_id,
query={'season_id': initial_state['mediaInfo']['season_id']},
note='Downloading season info')['result']['main_section']['episodes']
def _real_extract(self, url):
ss_id = self._match_id(url)
return self.playlist_result((
self.url_result(entry['share_url'], BiliBiliBangumiIE, entry['aid'])
for entry in episode_list), media_id)
return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id)
class BilibiliSpaceBaseIE(InfoExtractor):
......
from .common import InfoExtractor
class StreamanityIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?streamanity\.com/video/(?P<id>[A-Za-z0-9]+)'
_TESTS = [{
'url': 'https://streamanity.com/video/9DFPTnuYi8f2',
'md5': '6ab171e8d4a02ad5dcbff6bea44cf5a1',
'info_dict': {
'id': '9DFPTnuYi8f2',
'ext': 'mp4',
'title': 'Bitcoin vs The Lighting Network',
'thumbnail': r're:https://res\.cloudinary\.com/.+\.png',
'description': '',
'uploader': 'Tom Bombadil (Freddy78)',
}
}, {
'url': 'https://streamanity.com/video/JktOUjSlfzTD',
'md5': '31f131e28abd3377c38be586a59532dc',
'info_dict': {
'id': 'JktOUjSlfzTD',
'ext': 'mp4',
'title': 'Share data when you see it',
'thumbnail': r're:https://res\.cloudinary\.com/.+\.png',
'description': 'Reposting as data should be public and stored on blockchain',
'uploader': 'digitalcurrencydaily',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video_info = self._download_json(
f'https://app.streamanity.com/api/video/{video_id}', video_id)['data']['video']
formats = self._extract_m3u8_formats(
f'https://stream.mux.com/{video_info["play_id"]}.m3u8?token={video_info["token"]}',
video_id, ext='mp4', m3u8_id='hls')
return {
'id': video_id,
'title': video_info['title'],
'description': video_info.get('description'),
'uploader': video_info.get('author_name'),
'is_live': False,
'thumbnail': video_info.get('thumb'),
'formats': formats,
}
......@@ -44,9 +44,11 @@ class VRTBaseIE(GigyaBaseIE):
'version': '2.7.4-prod-2023-04-19T06:05:45'
}
}
# From https://player.vrt.be/vrtnws/js/main.js & https://player.vrt.be/ketnet/js/main.fd1de01a40a1e3d842ea.js
# From https://player.vrt.be/vrtnws/js/main.js & https://player.vrt.be/ketnet/js/main.8cdb11341bcb79e4cd44.js
_JWT_KEY_ID = '0-0Fp51UZykfaiCJrfTE3+oMI8zvDteYfPtR+2n1R+z8w='
_JWT_SIGNING_KEY = '2a9251d782700769fb856da5725daf38661874ca6f80ae7dc2b05ec1a81a24ae'
_JWT_SIGNING_KEY = 'b5f500d55cb44715107249ccd8a5c0136cfb2788dbb71b90a4f142423bacaf38' # -dev
# player-stag.vrt.be key: d23987504521ae6fbf2716caca6700a24bb1579477b43c84e146b279de5ca595
# player.vrt.be key: 2a9251d782700769fb856da5725daf38661874ca6f80ae7dc2b05ec1a81a24ae
def _extract_formats_and_subtitles(self, data, video_id):
if traverse_obj(data, 'drm'):
......
......@@ -114,7 +114,7 @@ def run(self, info):
self._report_run('ffmpeg', filename)
self.run_ffmpeg(filename, temp_filename, options)
elif info['ext'] in ['m4a', 'mp4', 'mov']:
elif info['ext'] in ['m4a', 'mp4', 'm4v', 'mov']:
prefer_atomicparsley = 'embed-thumbnail-atomicparsley' in self.get_param('compat_opts', [])
# Method 1: Use mutagen
if not mutagen or prefer_atomicparsley:
......@@ -213,7 +213,7 @@ def run(self, info):
temp_filename = filename
else:
raise EmbedThumbnailPPError('Supported filetypes for thumbnail embedding are: mp3, mkv/mka, ogg/opus/flac, m4a/mp4/mov')
raise EmbedThumbnailPPError('Supported filetypes for thumbnail embedding are: mp3, mkv/mka, ogg/opus/flac, m4a/mp4/m4v/mov')
if success and temp_filename != filename:
os.replace(temp_filename, filename)
......
......@@ -5123,14 +5123,18 @@ def clean_podcast_url(url):
(?:
chtbl\.com/track|
media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
play\.podtrac\.com
)/[^/]+|
play\.podtrac\.com|
chrt\.fm/track|
mgln\.ai/e
)(?:/[^/.]+)?|
(?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
flex\.acast\.com|
pd(?:
cn\.co| # https://podcorn.com/analytics-prefix/
st\.fm # https://podsights.com/docs/
)/e
)/e|
[0-9]\.gum\.fm|
pscrb\.fm/rss/p
)/''', '', url)
return re.sub(r'^\w+://(\w+://)', r'\1', url)
......