From 305959db6d46ae43a0ebe241627c3ed947f43c3f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 17 Jul 2014 08:46:11 +0200 Subject: [PATCH] Youku: playlist support --- src/you_get/extractor/youku.py | 44 ++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/src/you_get/extractor/youku.py b/src/you_get/extractor/youku.py index 4f3c0b3..b746071 100644 --- a/src/you_get/extractor/youku.py +++ b/src/you_get/extractor/youku.py @@ -16,28 +16,46 @@ class Youku(VideoExtractor): {'id': '3gphd', 'container': '3gp', 'video_profile': '高清(3GP)'}, ] + def parse_m3u8(m3u8): + return re.findall(r'(http://[^?]+)\?ts_start=0', m3u8) + def get_vid_from_url(url): """Extracts video ID from URL. """ - patterns = [ - 'youku.com/v_show/id_([\w=]+)', - 'player.youku.com/player.php/sid/([\w=]+)/v.swf', - 'loader\.swf\?VideoIDS=([\w=]+)', - ] - matches = match1(url, *patterns) - if matches: - return matches[0] - else: - return None + return match1(url, r'youku\.com/v_show/id_([\w=]+)') or \ + match1(url, r'player\.youku\.com/player\.php/sid/([\w=]+)/v\.swf') or \ + match1(url, r'loader\.swf\?VideoIDS=([\w=]+)') - def parse_m3u8(m3u8): - return re.findall(r'(http://[^?]+)\?ts_start=0', m3u8) + def get_playlist_id_from_url(url): + """Extracts playlist ID from URL. + """ + return match1(url, r'youku\.com/playlist_show/id_([\w=]+)') + + def download_playlist_by_url(self, url, **kwargs): + self.url = url + + playlist_id = __class__.get_playlist_id_from_url(self.url) + if playlist_id is None: + log.wtf('[Failed] Unsupported URL pattern.') + + video_page = get_content('http://www.youku.com/playlist_show/id_%s' % playlist_id) + videos = set(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', video_page)) + self.title = re.search(r'