Update README with new options

[youtube-dl.git] / youtube-dl
diff --git a/youtube-dl b/youtube-dl

index dbcf1c9..0e7f5d6 100755 (executable)
--- a/youtube-dl
+++ b/youtube-dl
@@ -15,7 +15,7 @@ __author__  = (
         )
  
  __license__ = 'Public Domain'
-__version__ = '2011.09.15'
+__version__ = '2011.09.18c'
  
  UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
  
@@ -625,11 +625,12 @@ class FileDownloader(object):
                         return
                 filetime = timeconvert(timestr)
                 if filetime is None:
-                       return
+                       return filetime
                 try:
                         os.utime(filename, (time.time(), filetime))
                 except:
                         pass
+               return filetime
  
         def report_writedescription(self, descfn):
                 """ Report that the description file is being written """
@@ -697,20 +698,23 @@ class FileDownloader(object):
         def process_info(self, info_dict):
                 """Process a single dictionary returned by an InfoExtractor."""
                 filename = self.prepare_filename(info_dict)
+               
+               # Forced printings
+               if self.params.get('forcetitle', False):
+                       print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
+               if self.params.get('forceurl', False):
+                       print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
+               if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
+                       print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
+               if self.params.get('forcedescription', False) and 'description' in info_dict:
+                       print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
+               if self.params.get('forcefilename', False) and filename is not None:
+                       print filename.encode(preferredencoding(), 'xmlcharrefreplace')
+               if self.params.get('forceformat', False):
+                       print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')
+
                 # Do nothing else if in simulate mode
                 if self.params.get('simulate', False):
-                       # Forced printings
-                       if self.params.get('forcetitle', False):
-                               print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
-                       if self.params.get('forceurl', False):
-                               print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
-                       if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
-                               print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
-                       if self.params.get('forcedescription', False) and 'description' in info_dict:
-                               print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
-                       if self.params.get('forcefilename', False) and filename is not None:
-                               print filename.encode(preferredencoding(), 'xmlcharrefreplace')
-
                         return
  
                 if filename is None:
@@ -769,23 +773,24 @@ class FileDownloader(object):
                                 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
                                 return
  
-               try:
-                       success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
-               except (OSError, IOError), err:
-                       raise UnavailableVideoError
-               except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self.trouble(u'ERROR: unable to download video data: %s' % str(err))
-                       return
-               except (ContentTooShortError, ), err:
-                       self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
-                       return
-
-               if success:
+               if not self.params.get('skip_download', False):
                         try:
-                               self.post_process(filename, info_dict)
-                       except (PostProcessingError), err:
-                               self.trouble(u'ERROR: postprocessing: %s' % str(err))
+                               success = self._do_download(filename, info_dict)
+                       except (OSError, IOError), err:
+                               raise UnavailableVideoError
+                       except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                               self.trouble(u'ERROR: unable to download video data: %s' % str(err))
                                 return
+                       except (ContentTooShortError, ), err:
+                               self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
+                               return
+       
+                       if success:
+                               try:
+                                       self.post_process(filename, info_dict)
+                               except (PostProcessingError), err:
+                                       self.trouble(u'ERROR: postprocessing: %s' % str(err))
+                                       return
  
         def download(self, url_list):
                 """Download a given list of URLs."""
@@ -859,7 +864,10 @@ class FileDownloader(object):
                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
                         return False
  
-       def _do_download(self, filename, url, player_url):
+       def _do_download(self, filename, info_dict):
+               url = info_dict['url']
+               player_url = info_dict.get('player_url', None)
+
                 # Check file already present
                 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
                         self.report_file_already_downloaded(filename)
@@ -871,7 +879,6 @@ class FileDownloader(object):
  
                 tmpfilename = self.temp_name(filename)
                 stream = None
-               open_mode = 'wb'
  
                 # Do not include the Accept-Encoding header
                 headers = {'Youtubedl-no-compression': 'True'}
@@ -884,11 +891,14 @@ class FileDownloader(object):
                 else:
                         resume_len = 0
  
-               # Request parameters in case of being able to resume
-               if self.params.get('continuedl', False) and resume_len != 0:
-                       self.report_resuming_byte(resume_len)
-                       request.add_header('Range', 'bytes=%d-' % resume_len)
-                       open_mode = 'ab'
+               open_mode = 'wb'
+               if resume_len != 0:
+                       if self.params.get('continuedl', False):
+                               self.report_resuming_byte(resume_len)
+                               request.add_header('Range','bytes=%d-' % resume_len)
+                               open_mode = 'ab'
+                       else:
+                               resume_len = 0
  
                 count = 0
                 retries = self.params.get('retries', 0)
@@ -972,10 +982,13 @@ class FileDownloader(object):
                         block_size = self.best_block_size(after - before, len(data_block))
  
                         # Progress message
-                       percent_str = self.calc_percent(byte_counter, data_len)
-                       eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
-                       self.report_progress(percent_str, data_len_str, speed_str, eta_str)
+                       if data_len is None:
+                               self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
+                       else:
+                               percent_str = self.calc_percent(byte_counter, data_len)
+                               eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
+                               self.report_progress(percent_str, data_len_str, speed_str, eta_str)
  
                         # Apply rate limit
                         self.slow_down(start, byte_counter - resume_len)
@@ -991,7 +1004,7 @@ class FileDownloader(object):
  
                 # Update file modification time
                 if self.params.get('updatetime', True):
-                       self.try_utime(filename, data.info().get('last-modified', None))
+                       info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
  
                 return True
  
@@ -1069,13 +1082,13 @@ class InfoExtractor(object):
  class YoutubeIE(InfoExtractor):
         """Information extractor for youtube.com."""
  
-       _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
+       _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
         _NETRC_MACHINE = 'youtube'
         # Listed in order of quality
-       _available_formats = ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13']
+       _available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
         _video_extensions = {
                 '13': '3gp',
                 '17': 'mp4',
@@ -1084,6 +1097,7 @@ class YoutubeIE(InfoExtractor):
                 '37': 'mp4',
                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
                 '43': 'webm',
+               '44': 'webm',
                 '45': 'webm',
         }
         IE_NAME = u'youtube'
@@ -1313,16 +1327,24 @@ class YoutubeIE(InfoExtractor):
                         if len(existing_formats) == 0:
                                 self._downloader.trouble(u'ERROR: no known formats available for video')
                                 return
-                       if req_format is None:
+                       if req_format is None or req_format == 'best':
                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
-                       elif req_format == '-1':
+                       elif req_format == 'worst':
+                               video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
+                       elif req_format in ('-1', 'all'):
                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
                         else:
-                               # Specific format
-                               if req_format not in url_map:
+                               # Specific formats. We pick the first in a slash-delimeted sequence.
+                               # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
+                               req_formats = req_format.split('/')
+                               video_url_list = None
+                               for rf in req_formats:
+                                       if rf in url_map:
+                                               video_url_list = [(rf, url_map[rf])]
+                                               break
+                               if video_url_list is None:
                                         self._downloader.trouble(u'ERROR: requested format not available')
                                         return
-                               video_url_list = [(req_format, url_map[req_format])] # Specific format
                 else:
                         self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')
                         return
@@ -2827,6 +2849,8 @@ class FacebookIE(InfoExtractor):
                                 return
                         if req_format is None:
                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
+                       elif req_format == 'worst':
+                               video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
                         elif req_format == '-1':
                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
                         else:
@@ -3152,7 +3176,7 @@ class ComedyCentralIE(InfoExtractor):
  class EscapistIE(InfoExtractor):
         """Information extractor for The Escapist """
  
-       _VALID_URL = r'^(https?://)?(www\.)escapistmagazine.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?].*$'
+       _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
         IE_NAME = u'escapist'
  
         def report_extraction(self, showName):
@@ -3282,11 +3306,13 @@ class PostProcessor(object):
  
  class FFmpegExtractAudioPP(PostProcessor):
  
-       def __init__(self, downloader=None, preferredcodec=None):
+       def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False):
                 PostProcessor.__init__(self, downloader)
                 if preferredcodec is None:
                         preferredcodec = 'best'
                 self._preferredcodec = preferredcodec
+               self._preferredquality = preferredquality
+               self._keepvideo = keepvideo
  
         @staticmethod
         def get_audio_codec(path):
@@ -3325,24 +3351,32 @@ class FFmpegExtractAudioPP(PostProcessor):
  
                 more_opts = []
                 if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
-                       if filecodec == 'aac' or filecodec == 'mp3':
+                       if filecodec == 'aac' or filecodec == 'mp3' or filecodec == 'vorbis':
                                 # Lossless if possible
                                 acodec = 'copy'
                                 extension = filecodec
                                 if filecodec == 'aac':
                                         more_opts = ['-f', 'adts']
+                               if filecodec == 'vorbis':
+                                       extension = 'ogg'
                         else:
                                 # MP3 otherwise.
                                 acodec = 'libmp3lame'
                                 extension = 'mp3'
-                               more_opts = ['-ab', '128k']
+                               more_opts = []
+                               if self._preferredquality is not None:
+                                       more_opts += ['-ab', self._preferredquality]
                 else:
                         # We convert the audio (lossy)
-                       acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
+                       acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'vorbis': 'libvorbis'}[self._preferredcodec]
                         extension = self._preferredcodec
-                       more_opts = ['-ab', '128k']
+                       more_opts = []
+                       if self._preferredquality is not None:
+                               more_opts += ['-ab', self._preferredquality]
                         if self._preferredcodec == 'aac':
                                 more_opts += ['-f', 'adts']
+                       if self._preferredcodec == 'vorbis':
+                               extension = 'ogg'
  
                 (prefix, ext) = os.path.splitext(path)
                 new_path = prefix + '.' + extension
@@ -3353,11 +3387,19 @@ class FFmpegExtractAudioPP(PostProcessor):
                         self._downloader.to_stderr(u'WARNING: error running ffmpeg')
                         return None
  
-               try:
-                       os.remove(path)
-               except (IOError, OSError):
-                       self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
-                       return None
+               # Try to update the date time for extracted audio file.
+               if information.get('filetime') is not None:
+                       try:
+                               os.utime(new_path, (time.time(), information['filetime']))
+                       except:
+                               self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
+
+               if not self._keepvideo:
+                       try:
+                               os.remove(path)
+                       except (IOError, OSError):
+                               self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
+                               return None
  
                 information['filepath'] = new_path
                 return information
@@ -3375,6 +3417,11 @@ def updateSelf(downloader, filename):
                 try:
                         urlh = urllib.urlopen(UPDATE_URL)
                         newcontent = urlh.read()
+                       
+                       vmatch = re.search("__version__ = '([^']+)'", newcontent)
+                       if vmatch is not None and vmatch.group(1) == __version__:
+                               downloader.to_screen('youtube-dl is up-to-date (' + __version__ + ')')
+                               return
                 finally:
                         urlh.close()
         except (IOError, OSError), err:
@@ -3389,7 +3436,7 @@ def updateSelf(downloader, filename):
         except (IOError, OSError), err:
                 sys.exit('ERROR: unable to overwrite current version')
  
-       downloader.to_screen('Updated youtube-dl. Restart to use the new version.')
+       downloader.to_screen('Updated youtube-dl. Restart youtube-dl to use the new version.')
  
  def parseOpts():
         # Deferred imports
@@ -3487,7 +3534,7 @@ def parseOpts():
         video_format.add_option('-f', '--format',
                         action='store', dest='format', metavar='FORMAT', help='video format code')
         video_format.add_option('--all-formats',
-                       action='store_const', dest='format', help='download all available video formats', const='-1')
+                       action='store_const', dest='format', help='download all available video formats', const='all')
         video_format.add_option('--max-quality',
                         action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
  
@@ -3495,7 +3542,9 @@ def parseOpts():
         verbosity.add_option('-q', '--quiet',
                         action='store_true', dest='quiet', help='activates quiet mode', default=False)
         verbosity.add_option('-s', '--simulate',
-                       action='store_true', dest='simulate', help='do not download video', default=False)
+                       action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
+       verbosity.add_option('--skip-download',
+                       action='store_true', dest='skip_download', help='do not download the video', default=False)
         verbosity.add_option('-g', '--get-url',
                         action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
         verbosity.add_option('-e', '--get-title',
@@ -3509,6 +3558,9 @@ def parseOpts():
         verbosity.add_option('--get-filename',
                         action='store_true', dest='getfilename',
                         help='simulate, quiet but print output filename', default=False)
+       verbosity.add_option('--get-format',
+                       action='store_true', dest='getformat',
+                       help='simulate, quiet but print output format', default=False)
         verbosity.add_option('--no-progress',
                         action='store_true', dest='noprogress', help='do not print progress bar', default=False)
         verbosity.add_option('--console-title',
@@ -3531,8 +3583,11 @@ def parseOpts():
                         action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
         filesystem.add_option('-c', '--continue',
                         action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
+       filesystem.add_option('--no-continue',
+                       action='store_false', dest='continue_dl',
+                       help='do not resume partially downloaded files (restart from beginning)')
         filesystem.add_option('--cookies',
-                       dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
+                       dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
         filesystem.add_option('--no-part',
                         action='store_true', dest='nopart', help='do not use .part files', default=False)
         filesystem.add_option('--no-mtime',
@@ -3549,7 +3604,11 @@ def parseOpts():
         postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
                         help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
         postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
-                       help='"best", "aac" or "mp3"; best by default')
+                       help='"best", "aac", "vorbis" or "mp3"; best by default')
+       postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K',
+                       help='ffmpeg audio bitrate specification, 128k by default')
+       postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
+                       help='keeps the video file on disk after the post-processing; the video is erased by default')
  
  
         parser.add_option_group(general)
@@ -3572,12 +3631,12 @@ def gen_extractors():
         google_ie = GoogleIE()
         yahoo_ie = YahooIE()
         return [
-               youtube_ie,
-               MetacafeIE(youtube_ie),
-               DailymotionIE(),
                 YoutubePlaylistIE(youtube_ie),
                 YoutubeUserIE(youtube_ie),
                 YoutubeSearchIE(youtube_ie),
+               youtube_ie,
+               MetacafeIE(youtube_ie),
+               DailymotionIE(),
                 google_ie,
                 GoogleSearchIE(google_ie),
                 PhotobucketIE(),
@@ -3679,7 +3738,7 @@ def main():
         except (TypeError, ValueError), err:
                 parser.error(u'invalid playlist end number specified')
         if opts.extractaudio:
-               if opts.audioformat not in ['best', 'aac', 'mp3']:
+               if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis']:
                         parser.error(u'invalid audio format specified')
  
         # File downloader
@@ -3687,13 +3746,15 @@ def main():
                 'usenetrc': opts.usenetrc,
                 'username': opts.username,
                 'password': opts.password,
-               'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
+               'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
                 'forceurl': opts.geturl,
                 'forcetitle': opts.gettitle,
                 'forcethumbnail': opts.getthumbnail,
                 'forcedescription': opts.getdescription,
                 'forcefilename': opts.getfilename,
-               'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
+               'forceformat': opts.getformat,
+               'simulate': opts.simulate,
+               'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
                 'format': opts.format,
                 'format_limit': opts.format_limit,
                 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
@@ -3728,7 +3789,7 @@ def main():
  
         # PostProcessors
         if opts.extractaudio:
-               fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat))
+               fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo))
  
         # Update version
         if opts.update_self: