X-Git-Url: http://git.jankratochvil.net/?p=youtube-dl.git;a=blobdiff_plain;f=youtube-dl;h=9ed1621865ae33fc85518c70044ceba4e23d9d3b;hp=67f8a38fcdaee99af1f29969af0108ef269dd555;hb=468c99257ca43401413366e7dc4544d5cca28a36;hpb=aab771fbdfeb7bb57b4a7d8e22e13bf34d31bac3 diff --git a/youtube-dl b/youtube-dl index 67f8a38..9ed1621 100755 --- a/youtube-dl +++ b/youtube-dl @@ -17,7 +17,7 @@ __author__ = ( ) __license__ = 'Public Domain' -__version__ = '2011.10.19' +__version__ = '2011.11.21' UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' @@ -79,8 +79,6 @@ std_headers = { 'Accept-Language': 'en-us,en;q=0.5', } -simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') - try: import json except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson): @@ -279,6 +277,9 @@ def timeconvert(timestr): timestamp = email.utils.mktime_tz(timetuple) return timestamp +def _simplify_title(title): + expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE) + return expr.sub(u'_', title).strip(u'_') class DownloadError(Exception): """Download Error exception. @@ -1291,8 +1292,7 @@ class YoutubeIE(InfoExtractor): video_title = sanitize_title(video_title) # simplified title - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) - simple_title = simple_title.strip(ur'_') + simple_title = _simplify_title(video_title) # thumbnail image if 'thumbnail_url' not in video_info: @@ -1562,9 +1562,6 @@ class DailymotionIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id) - def _real_initialize(self): - return - def _real_extract(self, url): # Extract id and simplified title from URL mobj = re.match(self._VALID_URL, url) @@ -1653,9 +1650,6 @@ class GoogleIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id) - def _real_initialize(self): - return - def _real_extract(self, url): # Extract id from URL mobj = re.match(self._VALID_URL, url) @@ -1699,7 +1693,7 @@ class GoogleIE(InfoExtractor): return video_title = mobj.group(1).decode('utf-8') video_title = sanitize_title(video_title) - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = _simplify_title(video_title) # Extract video description mobj = re.search(r'([^<]*)', webpage) @@ -1760,9 +1754,6 @@ class PhotobucketIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id) - def _real_initialize(self): - return - def _real_extract(self, url): # Extract id from URL mobj = re.match(self._VALID_URL, url) @@ -1801,7 +1792,7 @@ class PhotobucketIE(InfoExtractor): return video_title = mobj.group(1).decode('utf-8') video_title = sanitize_title(video_title) - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = _simplify_title(vide_title) video_uploader = mobj.group(2).decode('utf-8') @@ -1842,9 +1833,6 @@ class YahooIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id) - def _real_initialize(self): - return - def _real_extract(self, url, new_video=True): # Extract ID from URL mobj = re.match(self._VALID_URL, url) @@ -1898,7 +1886,7 @@ class YahooIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to extract video title') return video_title = mobj.group(1).decode('utf-8') - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = _simplify_title(video_title) mobj = re.search(r'

(.*)

', webpage) if mobj is None: @@ -1995,9 +1983,6 @@ class VimeoIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id) - def _real_initialize(self): - return - def _real_extract(self, url, new_video=True): # Extract ID from URL mobj = re.match(self._VALID_URL, url) @@ -2029,7 +2014,7 @@ class VimeoIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to extract video title') return video_title = mobj.group(1).decode('utf-8') - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = _simple_title(video_title) # Extract uploader mobj = re.search(r'http://vimeo.com/(.*?)', webpage) @@ -2120,9 +2105,6 @@ class GenericIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id) - def _real_initialize(self): - return - def _real_extract(self, url): # At this point we have a new video self._downloader.increment_downloads() @@ -2176,7 +2158,7 @@ class GenericIE(InfoExtractor): return video_title = mobj.group(1).decode('utf-8') video_title = sanitize_title(video_title) - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = _simplify_title(video_title) # video uploader is domain name mobj = re.match(r'(?:https?://)?([^/]*)/.*', url) @@ -2649,9 +2631,6 @@ class DepositFilesIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id) - def _real_initialize(self): - return - def _real_extract(self, url): # At this point we have a new file self._downloader.increment_downloads() @@ -2849,9 +2828,7 @@ class FacebookIE(InfoExtractor): video_title = video_title.decode('utf-8') video_title = sanitize_title(video_title) - # simplified title - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) - simple_title = simple_title.strip(ur'_') + simple_title = _simplify_title(video_title) # thumbnail image if 'thumbnail' not in video_info: @@ -2942,11 +2919,6 @@ class BlipTVIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[%s] %s: Direct download detected' % (self.IE_NAME, title)) - def _simplify_title(self, title): - res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) - res = res.strip(ur'_') - return res - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: @@ -2966,13 +2938,14 @@ class BlipTVIE(InfoExtractor): if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download basename = url.split('/')[-1] title,ext = os.path.splitext(basename) + title = title.decode('UTF-8') ext = ext.replace('.', '') self.report_direct_download(title) info = { 'id': title, 'url': url, 'title': title, - 'stitle': self._simplify_title(title), + 'stitle': _simplify_title(title), 'ext': ext, 'urlhandle': urlh } @@ -3006,7 +2979,7 @@ class BlipTVIE(InfoExtractor): 'uploader': data['display_name'], 'upload_date': upload_date, 'title': data['title'], - 'stitle': self._simplify_title(data['title']), + 'stitle': _simplify_title(data['title']), 'ext': ext, 'format': data['media']['mimeType'], 'thumbnail': data['thumbnailUrl'], @@ -3042,9 +3015,6 @@ class MyVideoIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id) - def _real_initialize(self): - return - def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) if mobj is None: @@ -3052,10 +3022,6 @@ class MyVideoIE(InfoExtractor): return video_id = mobj.group(1) - simple_title = mobj.group(2).decode('utf-8') - # should actually not be necessary - simple_title = sanitize_title(simple_title) - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', simple_title) # Get video webpage request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id) @@ -3082,6 +3048,8 @@ class MyVideoIE(InfoExtractor): video_title = mobj.group(1) video_title = sanitize_title(video_title) + simple_title = _simplify_title(video_title) + try: self._downloader.process_info({ 'id': video_id, @@ -3115,11 +3083,6 @@ class ComedyCentralIE(InfoExtractor): def report_player_url(self, episode_id): self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id) - def _simplify_title(self, title): - res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) - res = res.strip(ur'_') - return res - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: @@ -3128,9 +3091,9 @@ class ComedyCentralIE(InfoExtractor): if mobj.group('shortname'): if mobj.group('shortname') in ('tds', 'thedailyshow'): - url = 'http://www.thedailyshow.com/full-episodes/' + url = u'http://www.thedailyshow.com/full-episodes/' else: - url = 'http://www.colbertnation.com/full-episodes/' + url = u'http://www.colbertnation.com/full-episodes/' mobj = re.match(self._VALID_URL, url) assert mobj is not None @@ -3216,7 +3179,7 @@ class ComedyCentralIE(InfoExtractor): self._downloader.increment_downloads() - effTitle = showId + '-' + epTitle + effTitle = showId + u'-' + epTitle info = { 'id': shortMediaId, 'url': video_url, @@ -3250,11 +3213,6 @@ class EscapistIE(InfoExtractor): def report_config_download(self, showName): self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName) - def _simplify_title(self, title): - res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) - res = res.strip(ur'_') - return res - def _real_extract(self, url): htmlParser = HTMLParser.HTMLParser() @@ -3307,7 +3265,7 @@ class EscapistIE(InfoExtractor): 'uploader': showName, 'upload_date': None, 'title': showName, - 'stitle': self._simplify_title(showName), + 'stitle': _simplify_title(showName), 'ext': 'flv', 'format': 'flv', 'thumbnail': imgUrl, @@ -3335,11 +3293,6 @@ class CollegeHumorIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) - def _simplify_title(self, title): - res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) - res = res.strip(ur'_') - return res - def _real_extract(self, url): htmlParser = HTMLParser.HTMLParser() @@ -3381,7 +3334,7 @@ class CollegeHumorIE(InfoExtractor): videoNode = mdoc.findall('./video')[0] info['description'] = videoNode.findall('./description')[0].text info['title'] = videoNode.findall('./caption')[0].text - info['stitle'] = self._simplify_title(info['title']) + info['stitle'] = _simplify_title(info['title']) info['url'] = videoNode.findall('./file')[0].text info['thumbnail'] = videoNode.findall('./thumbnail')[0].text info['ext'] = info['url'].rpartition('.')[2] @@ -3412,11 +3365,6 @@ class XVideosIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) - def _simplify_title(self, title): - res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) - res = res.strip(ur'_') - return res - def _real_extract(self, url): htmlParser = HTMLParser.HTMLParser() @@ -3470,7 +3418,7 @@ class XVideosIE(InfoExtractor): 'uploader': None, 'upload_date': None, 'title': video_title, - 'stitle': self._simplify_title(video_title), + 'stitle': _simplify_title(video_title), 'ext': 'flv', 'format': 'flv', 'thumbnail': video_thumbnail, @@ -3507,9 +3455,6 @@ class SoundcloudIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) - def _real_initialize(self): - return - def _real_extract(self, url): htmlParser = HTMLParser.HTMLParser() @@ -3536,7 +3481,7 @@ class SoundcloudIE(InfoExtractor): self.report_extraction('%s/%s' % (uploader, slug_title)) # extract uid and stream token that soundcloud hands out for access - mobj = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', webpage) + mobj = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', webpage) if mobj: video_id = mobj.group(1) stream_token = mobj.group(2) @@ -3599,11 +3544,6 @@ class InfoQIE(InfoExtractor): """Report information extraction.""" self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) - def _simplify_title(self, title): - res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) - res = res.strip(ur'_') - return res - def _real_extract(self, url): htmlParser = HTMLParser.HTMLParser() @@ -3639,7 +3579,6 @@ class InfoQIE(InfoExtractor): return video_title = mobj.group(1).decode('utf-8') - # Extract description video_description = u'No description available.' mobj = re.search(r'', webpage) @@ -3656,7 +3595,7 @@ class InfoQIE(InfoExtractor): 'uploader': None, 'upload_date': None, 'title': video_title, - 'stitle': self._simplify_title(video_title), + 'stitle': _simplify_title(video_title), 'ext': extension, 'format': extension, # Extension is always(?) mp4, but seems to be flv 'thumbnail': None, @@ -4073,7 +4012,7 @@ def gen_extractors(): GenericIE() ] -def main(): +def _real_main(): parser, opts, args = parseOpts() # Open appropriate CookieJar @@ -4233,10 +4172,9 @@ def main(): sys.exit(retcode) - -if __name__ == '__main__': +def main(): try: - main() + _real_main() except DownloadError: sys.exit(1) except SameFileError: @@ -4244,4 +4182,7 @@ if __name__ == '__main__': except KeyboardInterrupt: sys.exit(u'\nERROR: Interrupted by user') +if __name__ == '__main__': + main() + # vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: