X-Git-Url: https://git.jankratochvil.net/?a=blobdiff_plain;f=youtube-dl;h=6a603349115d45de2cb28fa07324a7ac4e6e3cb4;hb=561504fffaec8a141a5bdd20ca0a418795d38835;hp=ebfb6ae06531def478204f25108b3dc42170dfbe;hpb=a95567af99d182784314320ceca858bd960559c6;p=youtube-dl.git diff --git a/youtube-dl b/youtube-dl index ebfb6ae..6a60334 100755 --- a/youtube-dl +++ b/youtube-dl @@ -18,7 +18,7 @@ __author__ = ( ) __license__ = 'Public Domain' -__version__ = '2011.11.23' +__version__ = '2011.12.08' UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' @@ -282,6 +282,14 @@ def _simplify_title(title): expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE) return expr.sub(u'_', title).strip(u'_') +def _orderedSet(iterable): + """ Remove all duplicates from the input iterable """ + res = [] + for el in iterable: + if el not in res: + res.append(el) + return res + class DownloadError(Exception): """Download Error exception. @@ -309,6 +317,10 @@ class PostProcessingError(Exception): """ pass +class MaxDownloadsReached(Exception): + """ --max-downloads limit has been reached. """ + pass + class UnavailableVideoError(Exception): """Unavailable Format exception. @@ -699,8 +711,31 @@ class FileDownloader(object): self.trouble(u'ERROR: invalid system charset or erroneous output template') return None + def _match_entry(self, info_dict): + """ Returns None iff the file should be downloaded """ + + title = info_dict['title'] + matchtitle = self.params.get('matchtitle', False) + if matchtitle and not re.search(matchtitle, title, re.IGNORECASE): + return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' + rejecttitle = self.params.get('rejecttitle', False) + if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE): + return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' + return None + def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" + + reason = self._match_entry(info_dict) + if reason is not None: + self.to_screen(u'[download] ' + reason) + return + + max_downloads = self.params.get('max_downloads') + if max_downloads is not None: + if self._num_downloads > int(max_downloads): + raise MaxDownloadsReached() + filename = self.prepare_filename(info_dict) # Forced printings @@ -724,16 +759,6 @@ class FileDownloader(object): if filename is None: return - matchtitle=self.params.get('matchtitle',False) - rejecttitle=self.params.get('rejecttitle',False) - title=info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') - if matchtitle and not re.search(matchtitle, title, re.IGNORECASE): - self.to_screen(u'[download] "%s" title did not match pattern "%s"' % (title, matchtitle)) - return - if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE): - self.to_screen(u'[download] "%s" title matched reject pattern "%s"' % (title, rejecttitle)) - return - if self.params.get('nooverwrites', False) and os.path.exists(filename): self.to_stderr(u'WARNING: file exists and will be skipped') return @@ -1096,6 +1121,7 @@ class YoutubeIE(InfoExtractor): _NETRC_MACHINE = 'youtube' # Listed in order of quality _available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13'] + _available_formats_prefer_free = ['38', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13'] _video_extensions = { '13': '3gp', '17': 'mp4', @@ -1345,10 +1371,11 @@ class YoutubeIE(InfoExtractor): url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data) format_limit = self._downloader.params.get('format_limit', None) - if format_limit is not None and format_limit in self._available_formats: - format_list = self._available_formats[self._available_formats.index(format_limit):] + available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats + if format_limit is not None and format_limit in available_formats: + format_list = available_formats[available_formats.index(format_limit):] else: - format_list = self._available_formats + format_list = available_formats existing_formats = [x for x in format_list if x in url_map] if len(existing_formats) == 0: self._downloader.trouble(u'ERROR: no known formats available for video') @@ -1604,7 +1631,7 @@ class DailymotionIE(InfoExtractor): video_url = mediaURL - mobj = re.search(r'(?im)Dailymotion\s*-\s*(.+)\s*-\s*[^<]+?', webpage) + mobj = re.search(r'(?im)\s*(.+)\s*-\s*Video\s+Dailymotion', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract title') return @@ -3730,6 +3757,124 @@ class MixcloudIE(InfoExtractor): except UnavailableVideoError, err: self._downloader.trouble(u'ERROR: unable to download file') +class StanfordOpenClassroomIE(InfoExtractor): + """Information extractor for Stanford's Open ClassRoom""" + + _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P[^&]+)(&video=(?P