Added --console-title to display download progress in console window title.

[youtube-dl.git] / youtube-dl
diff --git a/youtube-dl b/youtube-dl

index 3d20a9d..d6aecea 100755 (executable)
--- a/youtube-dl
+++ b/youtube-dl
@@ -3,8 +3,11 @@
  # Author: Ricardo Garcia Gonzalez
  # Author: Danny Colligan
  # Author: Benjamin Johnson
+# Author: Vasyl' Vavrychuk
  # License: Public domain code
  import cookielib
+import ctypes
+import datetime
  import htmlentitydefs
  import httplib
  import locale
@@ -28,29 +31,15 @@ except ImportError:
         from cgi import parse_qs
  
  std_headers = {
-       'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.11) Gecko/20101019 Firefox/3.6.11',
+       'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101028 Firefox/3.6.12',
         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+       'Accept-Encoding': 'gzip, deflate',
         'Accept-Language': 'en-us,en;q=0.5',
  }
  
  simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  
-month_name_to_number = {
-       'January':      '01',
-       'February':     '02',
-       'March':        '03',
-       'April':        '04',
-       'May':          '05',
-       'June':         '06',
-       'July':         '07',
-       'August':       '08',
-       'September':    '09',
-       'October':      '10',
-       'November':     '11',
-       'December':     '12',
-}
-
  def preferredencoding():
         """Get preferred encoding.
  
@@ -124,7 +113,6 @@ def sanitize_open(filename, open_mode):
                 stream = open(filename, open_mode)
                 return (stream, filename)
  
-
  class DownloadError(Exception):
         """Download Error exception.
         
@@ -221,6 +209,7 @@ class FileDownloader(object):
         playliststart:    Playlist item to start at.
         playlistend:      Playlist item to end at.
         logtostderr:      Log messages to stderr instead of stdout.
+       consoletitle:     Display progress in console window's titlebar.
         """
  
         params = None
@@ -250,6 +239,13 @@ class FileDownloader(object):
                                 os.mkdir(dir)
         
         @staticmethod
+       def temp_name(filename):
+               """Returns a temporary filename for the given filename."""
+               if filename == u'-' or (os.path.exists(filename) and not os.path.isfile(filename)):
+                       return filename
+               return filename + u'.part'
+       
+       @staticmethod
         def format_bytes(bytes):
                 if bytes is None:
                         return 'N/A'
@@ -338,6 +334,17 @@ class FileDownloader(object):
                 """Print message to stderr."""
                 print >>sys.stderr, message.encode(preferredencoding())
         
+       def to_cons_title(self, message):
+               """Set console/terminal window title to message."""
+               if not self.params.get('consoletitle', False):
+                       return
+               if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
+                       # c_wchar_p() might not be necessary if `message` is
+                       # already of type unicode()
+                       ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
+               elif 'TERM' in os.environ:
+                       sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
+
         def fixed_template(self):
                 """Checks if the output template is fixed."""
                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
@@ -367,6 +374,14 @@ class FileDownloader(object):
                 speed = float(byte_counter) / elapsed
                 if speed > rate_limit:
                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
+       
+       def try_rename(self, old_filename, new_filename):
+               try:
+                       if old_filename == new_filename:
+                               return
+                       os.rename(old_filename, new_filename)
+               except (IOError, OSError), err:
+                       self.trouble(u'ERROR: unable to rename file')
  
         def report_destination(self, filename):
                 """Report destination filename."""
@@ -378,6 +393,8 @@ class FileDownloader(object):
                         return
                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
+               self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
+                               (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
  
         def report_resuming_byte(self, resume_len):
                 """Report attempt to resume at given byte."""
@@ -498,6 +515,7 @@ class FileDownloader(object):
         
         def _download_with_rtmpdump(self, filename, url, player_url):
                 self.report_destination(filename)
+               tmpfilename = self.temp_name(filename)
  
                 # Check for rtmpdump first
                 try:
@@ -509,36 +527,43 @@ class FileDownloader(object):
                 # Download using rtmpdump. rtmpdump returns exit code 2 when
                 # the connection was interrumpted and resuming appears to be
                 # possible. This is part of rtmpdump's normal usage, AFAIK.
-               basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', filename]
+               basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
                 while retval == 2 or retval == 1:
-                       prevsize = os.path.getsize(filename)
+                       prevsize = os.path.getsize(tmpfilename)
                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
                         time.sleep(5.0) # This seems to be needed
                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
-                       cursize = os.path.getsize(filename)
+                       cursize = os.path.getsize(tmpfilename)
                         if prevsize == cursize and retval == 1:
                                 break
                 if retval == 0:
-                       self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
+                       self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
+                       self.try_rename(tmpfilename, filename)
                         return True
                 else:
                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
                         return False
  
         def _do_download(self, filename, url, player_url):
+               # Check file already present
+               if self.params.get('continuedl', False) and os.path.isfile(filename):
+                       self.report_file_already_downloaded(filename)
+                       return True
+
                 # Attempt to download using rtmpdump
                 if url.startswith('rtmp'):
                         return self._download_with_rtmpdump(filename, url, player_url)
  
+               tmpfilename = self.temp_name(filename)
                 stream = None
                 open_mode = 'wb'
                 basic_request = urllib2.Request(url, None, std_headers)
                 request = urllib2.Request(url, None, std_headers)
  
                 # Establish possible resume length
-               if os.path.isfile(filename):
-                       resume_len = os.path.getsize(filename)
+               if os.path.isfile(tmpfilename):
+                       resume_len = os.path.getsize(tmpfilename)
                 else:
                         resume_len = 0
  
@@ -580,6 +605,7 @@ class FileDownloader(object):
                                                         # completely downloaded if the file size differs less than 100 bytes from
                                                         # the one in the hard drive.
                                                         self.report_file_already_downloaded(filename)
+                                                       self.try_rename(tmpfilename, filename)
                                                         return True
                                                 else:
                                                         # The length does not match, we start the download over
@@ -596,8 +622,10 @@ class FileDownloader(object):
                         return False
  
                 data_len = data.info().get('Content-length', None)
+               if data_len is not None:
+                       data_len = long(data_len) + resume_len
                 data_len_str = self.format_bytes(data_len)
-               byte_counter = 0
+               byte_counter = 0 + resume_len
                 block_size = 1024
                 start = time.time()
                 while True:
@@ -605,15 +633,14 @@ class FileDownloader(object):
                         before = time.time()
                         data_block = data.read(block_size)
                         after = time.time()
-                       data_block_len = len(data_block)
-                       if data_block_len == 0:
+                       if len(data_block) == 0:
                                 break
-                       byte_counter += data_block_len
+                       byte_counter += len(data_block)
  
                         # Open file just in time
                         if stream is None:
                                 try:
-                                       (stream, filename) = sanitize_open(filename, open_mode)
+                                       (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
                                         self.report_destination(filename)
                                 except (OSError, IOError), err:
                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
@@ -623,20 +650,22 @@ class FileDownloader(object):
                         except (IOError, OSError), err:
                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
                                 return False
-                       block_size = self.best_block_size(after - before, data_block_len)
+                       block_size = self.best_block_size(after - before, len(data_block))
  
                         # Progress message
                         percent_str = self.calc_percent(byte_counter, data_len)
-                       eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
-                       speed_str = self.calc_speed(start, time.time(), byte_counter)
+                       eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
+                       speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
  
                         # Apply rate limit
-                       self.slow_down(start, byte_counter)
+                       self.slow_down(start, byte_counter - resume_len)
  
+               stream.close()
                 self.report_finish()
-               if data_len is not None and str(byte_counter) != data_len:
+               if data_len is not None and byte_counter != data_len:
                         raise ContentTooShortError(byte_counter, long(data_len))
+               self.try_rename(tmpfilename, filename)
                 return True
  
  class InfoExtractor(object):
@@ -913,18 +942,13 @@ class YoutubeIE(InfoExtractor):
                 upload_date = u'NA'
                 mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
                 if mobj is not None:
-                       try:
-                               if ',' in mobj.group(1):
-                                       # Month Day, Year
-                                       m, d, y = mobj.group(1).replace(',', '').split()
-                               else:
-                                       # Day Month Year, we'll suppose
-                                       d, m, y = mobj.group(1).split()
-                               m = month_name_to_number[m]
-                               d = '%02d' % (long(d))
-                               upload_date = '%s%s%s' % (y, m, d)
-                       except:
-                               upload_date = u'NA'
+                       upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
+                       format_expressions = ['%d %B %Y', '%B %d %Y']
+                       for expression in format_expressions:
+                               try:
+                                       upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
+                               except:
+                                       pass
  
                 # description
                 video_description = 'No description available.'
@@ -937,8 +961,7 @@ class YoutubeIE(InfoExtractor):
                 video_token = urllib.unquote_plus(video_info['token'][0])
  
                 # Decide which formats to download
-               requested_format = self._downloader.params.get('format', None)
-               get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token)
+               req_format = self._downloader.params.get('format', None)
  
                 if 'fmt_url_map' in video_info:
                         url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
@@ -951,12 +974,16 @@ class YoutubeIE(InfoExtractor):
                         if len(existing_formats) == 0:
                                 self._downloader.trouble(u'ERROR: no known formats available for video')
                                 return
-                       if requested_format is None:
-                               video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality
-                       elif requested_format == '-1':
-                               video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats
+                       if req_format is None:
+                               video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
+                       elif req_format == '-1':
+                               video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
                         else:
-                               video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format
+                               # Specific format
+                               if req_format not in url_map:
+                                       self._downloader.trouble(u'ERROR: requested format not available')
+                                       return
+                               video_url_list = [(req_format, url_map[req_format])] # Specific format
  
                 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
                         self.report_rtmp_download()
@@ -990,7 +1017,7 @@ class YoutubeIE(InfoExtractor):
                                         'player_url':   player_url,
                                 })
                         except UnavailableVideoError, err:
-                               self._downloader.trouble(u'ERROR: unable to download video (format may not be available)')
+                               self._downloader.trouble(u'\nERROR: unable to download video')
  
  
  class MetacafeIE(InfoExtractor):
@@ -1135,7 +1162,7 @@ class MetacafeIE(InfoExtractor):
                                 'player_url':   None,
                         })
                 except UnavailableVideoError:
-                       self._downloader.trouble(u'ERROR: unable to download video')
+                       self._downloader.trouble(u'\nERROR: unable to download video')
  
  
  class DailymotionIE(InfoExtractor):
@@ -1224,7 +1251,7 @@ class DailymotionIE(InfoExtractor):
                                 'player_url':   None,
                         })
                 except UnavailableVideoError:
-                       self._downloader.trouble(u'ERROR: unable to download video')
+                       self._downloader.trouble(u'\nERROR: unable to download video')
  
  class GoogleIE(InfoExtractor):
         """Information extractor for video.google.com."""
@@ -1334,7 +1361,7 @@ class GoogleIE(InfoExtractor):
                                 'player_url':   None,
                         })
                 except UnavailableVideoError:
-                       self._downloader.trouble(u'ERROR: unable to download video')
+                       self._downloader.trouble(u'\nERROR: unable to download video')
  
  
  class PhotobucketIE(InfoExtractor):
@@ -1416,7 +1443,7 @@ class PhotobucketIE(InfoExtractor):
                                 'player_url':   None,
                         })
                 except UnavailableVideoError:
-                       self._downloader.trouble(u'ERROR: unable to download video')
+                       self._downloader.trouble(u'\nERROR: unable to download video')
  
  
  class YahooIE(InfoExtractor):
@@ -1574,7 +1601,7 @@ class YahooIE(InfoExtractor):
                                 'player_url':   None,
                         })
                 except UnavailableVideoError:
-                       self._downloader.trouble(u'ERROR: unable to download video')
+                       self._downloader.trouble(u'\nERROR: unable to download video')
  
  
  class GenericIE(InfoExtractor):
@@ -1617,6 +1644,7 @@ class GenericIE(InfoExtractor):
                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
                         return
  
+               self.report_extraction(video_id)
                 # Start with something easy: JW Player in SWFObject
                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
                 if mobj is None:
@@ -1674,7 +1702,7 @@ class GenericIE(InfoExtractor):
                                 'player_url':   None,
                         })
                 except UnavailableVideoError, err:
-                       self._downloader.trouble(u'ERROR: unable to download video')
+                       self._downloader.trouble(u'\nERROR: unable to download video')
  
  
  class YoutubeSearchIE(InfoExtractor):
@@ -2073,6 +2101,85 @@ class YoutubeUserIE(InfoExtractor):
                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
                 return
  
+class DepositFilesIE(InfoExtractor):
+       """Information extractor for depositfiles.com"""
+
+       _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
+
+       def __init__(self, downloader=None):
+               InfoExtractor.__init__(self, downloader)
+
+       @staticmethod
+       def suitable(url):
+               return (re.match(DepositFilesIE._VALID_URL, url) is not None)
+
+       def report_download_webpage(self, file_id):
+               """Report webpage download."""
+               self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
+
+       def report_extraction(self, file_id):
+               """Report information extraction."""
+               self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
+
+       def _real_initialize(self):
+               return
+
+       def _real_extract(self, url):
+               # At this point we have a new file
+               self._downloader.increment_downloads()
+
+               file_id = url.split('/')[-1]
+               # Rebuild url in english locale
+               url = 'http://depositfiles.com/en/files/' + file_id
+
+               # Retrieve file webpage with 'Free download' button pressed
+               free_download_indication = { 'gateway_result' : '1' }
+               request = urllib2.Request(url, urllib.urlencode(free_download_indication), std_headers)
+               try:
+                       self.report_download_webpage(file_id)
+                       webpage = urllib2.urlopen(request).read()
+               except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                       self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
+                       return
+
+               # Search for the real file URL
+               mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
+               if (mobj is None) or (mobj.group(1) is None):
+                       # Try to figure out reason of the error.
+                       mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
+                       if (mobj is not None) and (mobj.group(1) is not None):
+                               restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
+                               self._downloader.trouble(u'ERROR: %s' % restriction_message)
+                       else:
+                               self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
+                       return
+
+               file_url = mobj.group(1)
+               file_extension = os.path.splitext(file_url)[1][1:]
+
+               # Search for file title
+               mobj = re.search(r'<b title="(.*?)">', webpage)
+               if mobj is None:
+                       self._downloader.trouble(u'ERROR: unable to extract title')
+                       return
+               file_title = mobj.group(1).decode('utf-8')
+
+               try:
+                       # Process file information
+                       self._downloader.process_info({
+                               'id':           file_id.decode('utf-8'),
+                               'url':          file_url.decode('utf-8'),
+                               'uploader':     u'NA',
+                               'upload_date':  u'NA',
+                               'title':        file_title,
+                               'stitle':       file_title,
+                               'ext':          file_extension.decode('utf-8'),
+                               'format':       u'NA',
+                               'player_url':   None,
+                       })
+               except UnavailableVideoError, err:
+                       self._downloader.trouble(u'ERROR: unable to download file')
+
  class PostProcessor(object):
         """Post Processor class.
  
@@ -2145,7 +2252,7 @@ if __name__ == '__main__':
                 # Parse command line
                 parser = optparse.OptionParser(
                         usage='Usage: %prog [options] url...',
-                       version='2010.10.24',
+                       version='2010.12.09',
                         conflict_handler='resolve',
                 )
  
@@ -2165,6 +2272,8 @@ if __name__ == '__main__':
                                 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
                 parser.add_option('--playlist-end',
                                 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
+               parser.add_option('--dump-user-agent',
+                               action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False)
  
                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
                 authentication.add_option('-u', '--username',
@@ -2178,14 +2287,10 @@ if __name__ == '__main__':
                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
                 video_format.add_option('-f', '--format',
                                 action='store', dest='format', metavar='FORMAT', help='video format code')
-               video_format.add_option('-m', '--mobile-version',
-                               action='store_const', dest='format', help='alias for -f 17', const='17')
                 video_format.add_option('--all-formats',
                                 action='store_const', dest='format', help='download all available video formats', const='-1')
                 video_format.add_option('--max-quality',
                                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
-               video_format.add_option('-b', '--best-quality',
-                               action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)')
                 parser.add_option_group(video_format)
  
                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
@@ -2203,6 +2308,8 @@ if __name__ == '__main__':
                                 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
                 verbosity.add_option('--no-progress',
                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
+               verbosity.add_option('--console-title',
+                               action='store_true', dest='consoletitle', help='display progress in console titlebar', default=False)
                 parser.add_option_group(verbosity)
  
                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
@@ -2237,6 +2344,11 @@ if __name__ == '__main__':
                         except (IOError, OSError), err:
                                 sys.exit(u'ERROR: unable to open cookie file')
  
+               # Dump user agent
+               if opts.dump_user_agent:
+                       print std_headers['User-Agent']
+                       sys.exit(0)
+
                 # General configuration
                 cookie_processor = urllib2.HTTPCookieProcessor(jar)
                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
@@ -2259,8 +2371,6 @@ if __name__ == '__main__':
                 all_urls = batchurls + args
  
                 # Conflicting, missing and erroneous options
-               if opts.bestquality:
-                       print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n'
                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
                         parser.error(u'using .netrc conflicts with giving username/password')
                 if opts.password is not None and opts.username is None:
@@ -2306,6 +2416,7 @@ if __name__ == '__main__':
                 photobucket_ie = PhotobucketIE()
                 yahoo_ie = YahooIE()
                 yahoo_search_ie = YahooSearchIE(yahoo_ie)
+               deposit_files_ie = DepositFilesIE()
                 generic_ie = GenericIE()
  
                 # File downloader
@@ -2340,6 +2451,7 @@ if __name__ == '__main__':
                         'playliststart': opts.playliststart,
                         'playlistend': opts.playlistend,
                         'logtostderr': opts.outtmpl == '-',
+                       'consoletitle': opts.consoletitle,
                         })
                 fd.add_info_extractor(youtube_search_ie)
                 fd.add_info_extractor(youtube_pl_ie)
@@ -2352,6 +2464,7 @@ if __name__ == '__main__':
                 fd.add_info_extractor(photobucket_ie)
                 fd.add_info_extractor(yahoo_ie)
                 fd.add_info_extractor(yahoo_search_ie)
+               fd.add_info_extractor(deposit_files_ie)
  
                 # This must come last since it's the
                 # fallback if none of the others work