--list-extractors (Closes #161)

author Philipp Hagemeister <phihag@phihag.de>

Thu, 15 Sep 2011 08:43:49 +0000 (10:43 +0200)

committer Philipp Hagemeister <phihag@phihag.de>

Thu, 15 Sep 2011 09:03:29 +0000 (11:03 +0200)
author Philipp Hagemeister <phihag@phihag.de>
Thu, 15 Sep 2011 08:43:49 +0000 (10:43 +0200)
committer Philipp Hagemeister <phihag@phihag.de>
Thu, 15 Sep 2011 09:03:29 +0000 (11:03 +0200)
diff --git a/youtube-dl b/youtube-dl

index cd8e57b..dbcf1c9 100755 (executable)
--- a/youtube-dl
+++ b/youtube-dl
@@ -1086,6 +1086,7 @@ class YoutubeIE(InfoExtractor):
                 '43': 'webm',
                 '45': 'webm',
         }
+       IE_NAME = u'youtube'
  
         def report_lang(self):
                 """Report attempt to set language."""
@@ -1359,6 +1360,7 @@ class MetacafeIE(InfoExtractor):
         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
         _youtube_ie = None
+       IE_NAME = u'metacafe'
  
         def __init__(self, youtube_ie, downloader=None):
                 InfoExtractor.__init__(self, downloader)
@@ -1497,6 +1499,7 @@ class DailymotionIE(InfoExtractor):
         """Information Extractor for Dailymotion"""
  
         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
+       IE_NAME = u'dailymotion'
  
         def __init__(self, downloader=None):
                 InfoExtractor.__init__(self, downloader)
@@ -1587,6 +1590,7 @@ class GoogleIE(InfoExtractor):
         """Information extractor for video.google.com."""
  
         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
+       IE_NAME = u'video.google'
  
         def __init__(self, downloader=None):
                 InfoExtractor.__init__(self, downloader)
@@ -1693,6 +1697,7 @@ class PhotobucketIE(InfoExtractor):
         """Information extractor for photobucket.com."""
  
         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
+       IE_NAME = u'photobucket'
  
         def __init__(self, downloader=None):
                 InfoExtractor.__init__(self, downloader)
@@ -1774,6 +1779,7 @@ class YahooIE(InfoExtractor):
         # _VPAGE_URL matches only the extractable '/watch/' URLs
         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
+       IE_NAME = u'video.yahoo'
  
         def __init__(self, downloader=None):
                 InfoExtractor.__init__(self, downloader)
@@ -1926,6 +1932,7 @@ class VimeoIE(InfoExtractor):
  
         # _VALID_URL matches Vimeo URLs
         _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
+       IE_NAME = u'vimeo'
  
         def __init__(self, downloader=None):
                 InfoExtractor.__init__(self, downloader)
@@ -2036,7 +2043,8 @@ class VimeoIE(InfoExtractor):
  class GenericIE(InfoExtractor):
         """Generic last-resort information extractor."""
  
-       _VALID_URL = '.*'
+       _VALID_URL = r'.*'
+       IE_NAME = u'generic'
  
         def __init__(self, downloader=None):
                 InfoExtractor.__init__(self, downloader)
@@ -2140,6 +2148,7 @@ class YoutubeSearchIE(InfoExtractor):
         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
         _youtube_ie = None
         _max_youtube_results = 1000
+       IE_NAME = u'youtube:search'
  
         def __init__(self, youtube_ie, downloader=None):
                 InfoExtractor.__init__(self, downloader)
@@ -2228,6 +2237,7 @@ class GoogleSearchIE(InfoExtractor):
         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
         _google_ie = None
         _max_google_results = 1000
+       IE_NAME = u'video.google:search'
  
         def __init__(self, google_ie, downloader=None):
                 InfoExtractor.__init__(self, downloader)
@@ -2316,6 +2326,7 @@ class YahooSearchIE(InfoExtractor):
         _MORE_PAGES_INDICATOR = r'\s*Next'
         _yahoo_ie = None
         _max_yahoo_results = 1000
+       IE_NAME = u'video.yahoo:search'
  
         def __init__(self, yahoo_ie, downloader=None):
                 InfoExtractor.__init__(self, downloader)
@@ -2404,6 +2415,7 @@ class YoutubePlaylistIE(InfoExtractor):
         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
         _youtube_ie = None
+       IE_NAME = u'youtube:playlist'
  
         def __init__(self, youtube_ie, downloader=None):
                 InfoExtractor.__init__(self, downloader)
@@ -2478,6 +2490,7 @@ class YoutubeUserIE(InfoExtractor):
         _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
         _youtube_ie = None
+       IE_NAME = u'youtube:user'
  
         def __init__(self, youtube_ie, downloader=None):
                 InfoExtractor.__init__(self, downloader)
@@ -2560,6 +2573,7 @@ class DepositFilesIE(InfoExtractor):
         """Information extractor for depositfiles.com"""
  
         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
+       IE_NAME = u'DepositFiles'
  
         def __init__(self, downloader=None):
                 InfoExtractor.__init__(self, downloader)
@@ -2643,6 +2657,7 @@ class FacebookIE(InfoExtractor):
                 'highqual': 'mp4',
                 'lowqual': 'mp4',
         }
+       IE_NAME = u'facebook'
  
         def __init__(self, downloader=None):
                 InfoExtractor.__init__(self, downloader)
@@ -2852,6 +2867,7 @@ class BlipTVIE(InfoExtractor):
  
         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
         _URL_EXT = r'^.*\.([a-z0-9]+)$'
+       IE_NAME = u'blip.tv'
  
         def report_extraction(self, file_id):
                 """Report information extraction."""
@@ -2923,6 +2939,7 @@ class MyVideoIE(InfoExtractor):
         """Information Extractor for myvideo.de."""
  
         _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
+       IE_NAME = u'myvideo'
  
         def __init__(self, downloader=None):
                 InfoExtractor.__init__(self, downloader)
@@ -2994,7 +3011,8 @@ class MyVideoIE(InfoExtractor):
  class ComedyCentralIE(InfoExtractor):
         """Information extractor for The Daily Show and Colbert Report """
  
-       _VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
+       _VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
+       IE_NAME = u'comedycentral'
  
         def report_extraction(self, episode_id):
                 self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
@@ -3135,6 +3153,7 @@ class EscapistIE(InfoExtractor):
         """Information extractor for The Escapist """
  
         _VALID_URL = r'^(https?://)?(www\.)escapistmagazine.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?].*$'
+       IE_NAME = u'escapist'
  
         def report_extraction(self, showName):
                 self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName)
@@ -3446,6 +3465,9 @@ def parseOpts():
         general.add_option('--dump-user-agent',
                         action='store_true', dest='dump_user_agent',
                         help='display the current browser identification', default=False)
+       general.add_option('--list-extractors',
+                       action='store_true', dest='list_extractors',
+                       help='List all supported extractors and the URLs they would handle', default=False)
  
         selection.add_option('--playlist-start',
                         dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
@@ -3542,6 +3564,36 @@ def parseOpts():
  
         return parser, opts, args
  
+def gen_extractors():
+       """ Return a list of an instance of every supported extractor.
+       The order does matter; the first extractor matched is the one handling the URL.
+       """
+       youtube_ie = YoutubeIE()
+       google_ie = GoogleIE()
+       yahoo_ie = YahooIE()
+       return [
+               youtube_ie,
+               MetacafeIE(youtube_ie),
+               DailymotionIE(),
+               YoutubePlaylistIE(youtube_ie),
+               YoutubeUserIE(youtube_ie),
+               YoutubeSearchIE(youtube_ie),
+               google_ie,
+               GoogleSearchIE(google_ie),
+               PhotobucketIE(),
+               yahoo_ie,
+               YahooSearchIE(yahoo_ie),
+               DepositFilesIE(),
+               FacebookIE(),
+               BlipTVIE(),
+               VimeoIE(),
+               MyVideoIE(),
+               ComedyCentralIE(),
+               EscapistIE(),
+
+               GenericIE()
+       ]
+
  def main():
         parser, opts, args = parseOpts()
  
@@ -3561,12 +3613,6 @@ def main():
                 print std_headers['User-Agent']
                 sys.exit(0)
  
-       # General configuration
-       cookie_processor = urllib2.HTTPCookieProcessor(jar)
-       opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())
-       urllib2.install_opener(opener)
-       socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
-
         # Batch file verification
         batchurls = []
         if opts.batchfile is not None:
@@ -3582,6 +3628,23 @@ def main():
                         sys.exit(u'ERROR: batch file could not be read')
         all_urls = batchurls + args
  
+       # General configuration
+       cookie_processor = urllib2.HTTPCookieProcessor(jar)
+       opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())
+       urllib2.install_opener(opener)
+       socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
+
+       extractors = gen_extractors()
+
+       if opts.list_extractors:
+               for ie in extractors:
+                       print(ie.IE_NAME)
+                       matchedUrls = filter(lambda url: ie.suitable(url), all_urls)
+                       all_urls = filter(lambda url: url not in matchedUrls, all_urls)
+                       for mu in matchedUrls:
+                               print(u'  ' + mu)
+               sys.exit(0)
+
         # Conflicting, missing and erroneous options
         if opts.usenetrc and (opts.username is not None or opts.password is not None):
                 parser.error(u'using .netrc conflicts with giving username/password')
@@ -3619,33 +3682,6 @@ def main():
                 if opts.audioformat not in ['best', 'aac', 'mp3']:
                         parser.error(u'invalid audio format specified')
  
-       # Information extractors
-       youtube_ie = YoutubeIE()
-       google_ie = GoogleIE()
-       yahoo_ie = YahooIE()
-       extractors = [ # Order does matter
-               youtube_ie,
-               MetacafeIE(youtube_ie),
-               DailymotionIE(),
-               YoutubePlaylistIE(youtube_ie),
-               YoutubeUserIE(youtube_ie),
-               YoutubeSearchIE(youtube_ie),
-               google_ie,
-               GoogleSearchIE(google_ie),
-               PhotobucketIE(),
-               yahoo_ie,
-               YahooSearchIE(yahoo_ie),
-               DepositFilesIE(),
-               FacebookIE(),
-               BlipTVIE(),
-               VimeoIE(),
-               MyVideoIE(),
-               ComedyCentralIE(),
-               EscapistIE(),
-
-               GenericIE()
-       ]
-
         # File downloader
         fd = FileDownloader({
                 'usenetrc': opts.usenetrc,
author	Philipp Hagemeister <phihag@phihag.de>
	Thu, 15 Sep 2011 08:43:49 +0000 (10:43 +0200)
committer	Philipp Hagemeister <phihag@phihag.de>
	Thu, 15 Sep 2011 09:03:29 +0000 (11:03 +0200)