youtube-dl

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 # Author: Ricardo Garcia Gonzalez
   4 # Author: Danny Colligan
   5 # Author: Benjamin Johnson
   6 # Author: Vasyl' Vavrychuk
   7 # Author: Witold Baryluk
   8 # License: Public domain code
   9 import cookielib
  10 import ctypes
  11 import datetime
  12 import gzip
  13 import htmlentitydefs
  14 import httplib
  15 import locale
  16 import math
  17 import netrc
  18 import os
  19 import os.path
  20 import re
  21 import socket
  22 import string
  23 import StringIO
  24 import subprocess
  25 import sys
  26 import time
  27 import urllib
  28 import urllib2
  29 import zlib
  30
  31 # parse_qs was moved from the cgi module to the urlparse module recently.
  32 try:
  33         from urlparse import parse_qs
  34 except ImportError:
  35         from cgi import parse_qs
  36
  37 std_headers = {
  38         'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101028 Firefox/3.6.12',
  39         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  40         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  41         'Accept-Encoding': 'gzip, deflate',
  42         'Accept-Language': 'en-us,en;q=0.5',
  43 }
  44
  45 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  46
  47 def preferredencoding():
  48         """Get preferred encoding.
  49
  50         Returns the best encoding scheme for the system, based on
  51         locale.getpreferredencoding() and some further tweaks.
  52         """
  53         def yield_preferredencoding():
  54                 try:
  55                         pref = locale.getpreferredencoding()
  56                         u'TEST'.encode(pref)
  57                 except:
  58                         pref = 'UTF-8'
  59                 while True:
  60                         yield pref
  61         return yield_preferredencoding().next()
  62
  63 def htmlentity_transform(matchobj):
  64         """Transforms an HTML entity to a Unicode character.
  65
  66         This function receives a match object and is intended to be used with
  67         the re.sub() function.
  68         """
  69         entity = matchobj.group(1)
  70
  71         # Known non-numeric HTML entity
  72         if entity in htmlentitydefs.name2codepoint:
  73                 return unichr(htmlentitydefs.name2codepoint[entity])
  74
  75         # Unicode character
  76         mobj = re.match(ur'(?u)#(x?\d+)', entity)
  77         if mobj is not None:
  78                 numstr = mobj.group(1)
  79                 if numstr.startswith(u'x'):
  80                         base = 16
  81                         numstr = u'0%s' % numstr
  82                 else:
  83                         base = 10
  84                 return unichr(long(numstr, base))
  85
  86         # Unknown entity in name, return its literal representation
  87         return (u'&%s;' % entity)
  88
  89 def sanitize_title(utitle):
  90         """Sanitizes a video title so it could be used as part of a filename."""
  91         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
  92         return utitle.replace(unicode(os.sep), u'%')
  93
  94 def sanitize_open(filename, open_mode):
  95         """Try to open the given filename, and slightly tweak it if this fails.
  96
  97         Attempts to open the given filename. If this fails, it tries to change
  98         the filename slightly, step by step, until it's either able to open it
  99         or it fails and raises a final exception, like the standard open()
 100         function.
 101
 102         It returns the tuple (stream, definitive_file_name).
 103         """
 104         try:
 105                 if filename == u'-':
 106                         if sys.platform == 'win32':
 107                                 import msvcrt
 108                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 109                         return (sys.stdout, filename)
 110                 stream = open(filename, open_mode)
 111                 return (stream, filename)
 112         except (IOError, OSError), err:
 113                 # In case of error, try to remove win32 forbidden chars
 114                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
 115
 116                 # An exception here should be caught in the caller
 117                 stream = open(filename, open_mode)
 118                 return (stream, filename)
 119
 120 class DownloadError(Exception):
 121         """Download Error exception.
 122
 123         This exception may be thrown by FileDownloader objects if they are not
 124         configured to continue on errors. They will contain the appropriate
 125         error message.
 126         """
 127         pass
 128
 129 class SameFileError(Exception):
 130         """Same File exception.
 131
 132         This exception will be thrown by FileDownloader objects if they detect
 133         multiple files would have to be downloaded to the same file on disk.
 134         """
 135         pass
 136
 137 class PostProcessingError(Exception):
 138         """Post Processing exception.
 139
 140         This exception may be raised by PostProcessor's .run() method to
 141         indicate an error in the postprocessing task.
 142         """
 143         pass
 144
 145 class UnavailableVideoError(Exception):
 146         """Unavailable Format exception.
 147
 148         This exception will be thrown when a video is requested
 149         in a format that is not available for that video.
 150         """
 151         pass
 152
 153 class ContentTooShortError(Exception):
 154         """Content Too Short exception.
 155
 156         This exception may be raised by FileDownloader objects when a file they
 157         download is too small for what the server announced first, indicating
 158         the connection was probably interrupted.
 159         """
 160         # Both in bytes
 161         downloaded = None
 162         expected = None
 163
 164         def __init__(self, downloaded, expected):
 165                 self.downloaded = downloaded
 166                 self.expected = expected
 167
 168 class YoutubeDLHandler(urllib2.HTTPHandler):
 169         """Handler for HTTP requests and responses.
 170
 171         This class, when installed with an OpenerDirector, automatically adds
 172         the standard headers to every HTTP request and handles gzipped and
 173         deflated responses from web servers. If compression is to be avoided in
 174         a particular request, the original request in the program code only has
 175         to include the HTTP header "Youtubedl-No-Compression", which will be
 176         removed before making the real request.
 177
 178         Part of this code was copied from:
 179
 180           http://techknack.net/python-urllib2-handlers/
 181
 182         Andrew Rowls, the author of that code, agreed to release it to the
 183         public domain.
 184         """
 185
 186         @staticmethod
 187         def deflate(data):
 188                 try:
 189                         return zlib.decompress(data, -zlib.MAX_WBITS)
 190                 except zlib.error:
 191                         return zlib.decompress(data)
 192
 193         @staticmethod
 194         def addinfourl_wrapper(stream, headers, url, code):
 195                 if hasattr(urllib2.addinfourl, 'getcode'):
 196                         return urllib2.addinfourl(stream, headers, url, code)
 197                 ret = urllib2.addinfourl(stream, headers, url)
 198                 ret.code = code
 199                 return ret
 200
 201         def http_request(self, req):
 202                 for h in std_headers:
 203                         if h in req.headers:
 204                                 del req.headers[h]
 205                         req.add_header(h, std_headers[h])
 206                 if 'Youtubedl-no-compression' in req.headers:
 207                         if 'Accept-encoding' in req.headers:
 208                                 del req.headers['Accept-encoding']
 209                         del req.headers['Youtubedl-no-compression']
 210                 return req
 211
 212         def http_response(self, req, resp):
 213                 old_resp = resp
 214                 # gzip
 215                 if resp.headers.get('Content-encoding', '') == 'gzip':
 216                         gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
 217                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 218                         resp.msg = old_resp.msg
 219                 # deflate
 220                 if resp.headers.get('Content-encoding', '') == 'deflate':
 221                         gz = StringIO.StringIO(self.deflate(resp.read()))
 222                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 223                         resp.msg = old_resp.msg
 224                 return resp
 225
 226 class FileDownloader(object):
 227         """File Downloader class.
 228
 229         File downloader objects are the ones responsible of downloading the
 230         actual video file and writing it to disk if the user has requested
 231         it, among some other tasks. In most cases there should be one per
 232         program. As, given a video URL, the downloader doesn't know how to
 233         extract all the needed information, task that InfoExtractors do, it
 234         has to pass the URL to one of them.
 235
 236         For this, file downloader objects have a method that allows
 237         InfoExtractors to be registered in a given order. When it is passed
 238         a URL, the file downloader handles it to the first InfoExtractor it
 239         finds that reports being able to handle it. The InfoExtractor extracts
 240         all the information about the video or videos the URL refers to, and
 241         asks the FileDownloader to process the video information, possibly
 242         downloading the video.
 243
 244         File downloaders accept a lot of parameters. In order not to saturate
 245         the object constructor with arguments, it receives a dictionary of
 246         options instead. These options are available through the params
 247         attribute for the InfoExtractors to use. The FileDownloader also
 248         registers itself as the downloader in charge for the InfoExtractors
 249         that are added to it, so this is a "mutual registration".
 250
 251         Available options:
 252
 253         username:         Username for authentication purposes.
 254         password:         Password for authentication purposes.
 255         usenetrc:         Use netrc for authentication instead.
 256         quiet:            Do not print messages to stdout.
 257         forceurl:         Force printing final URL.
 258         forcetitle:       Force printing title.
 259         forcethumbnail:   Force printing thumbnail URL.
 260         forcedescription: Force printing description.
 261         forcefilename:    Force printing final filename.
 262         simulate:         Do not download the video files.
 263         format:           Video format code.
 264         format_limit:     Highest quality format to try.
 265         outtmpl:          Template for output names.
 266         ignoreerrors:     Do not stop on download errors.
 267         ratelimit:        Download speed limit, in bytes/sec.
 268         nooverwrites:     Prevent overwriting files.
 269         retries:          Number of times to retry for HTTP error 5xx
 270         continuedl:       Try to continue downloads if possible.
 271         noprogress:       Do not print the progress bar.
 272         playliststart:    Playlist item to start at.
 273         playlistend:      Playlist item to end at.
 274         logtostderr:      Log messages to stderr instead of stdout.
 275         consoletitle:     Display progress in console window's titlebar.
 276         nopart:           Do not use temporary .part files.
 277         """
 278
 279         params = None
 280         _ies = []
 281         _pps = []
 282         _download_retcode = None
 283         _num_downloads = None
 284         _screen_file = None
 285
 286         def __init__(self, params):
 287                 """Create a FileDownloader object with the given options."""
 288                 self._ies = []
 289                 self._pps = []
 290                 self._download_retcode = 0
 291                 self._num_downloads = 0
 292                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 293                 self.params = params
 294
 295         @staticmethod
 296         def pmkdir(filename):
 297                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
 298                 components = filename.split(os.sep)
 299                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
 300                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
 301                 for dir in aggregate:
 302                         if not os.path.exists(dir):
 303                                 os.mkdir(dir)
 304
 305         @staticmethod
 306         def format_bytes(bytes):
 307                 if bytes is None:
 308                         return 'N/A'
 309                 if type(bytes) is str:
 310                         bytes = float(bytes)
 311                 if bytes == 0.0:
 312                         exponent = 0
 313                 else:
 314                         exponent = long(math.log(bytes, 1024.0))
 315                 suffix = 'bkMGTPEZY'[exponent]
 316                 converted = float(bytes) / float(1024**exponent)
 317                 return '%.2f%s' % (converted, suffix)
 318
 319         @staticmethod
 320         def calc_percent(byte_counter, data_len):
 321                 if data_len is None:
 322                         return '---.-%'
 323                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 324
 325         @staticmethod
 326         def calc_eta(start, now, total, current):
 327                 if total is None:
 328                         return '--:--'
 329                 dif = now - start
 330                 if current == 0 or dif < 0.001: # One millisecond
 331                         return '--:--'
 332                 rate = float(current) / dif
 333                 eta = long((float(total) - float(current)) / rate)
 334                 (eta_mins, eta_secs) = divmod(eta, 60)
 335                 if eta_mins > 99:
 336                         return '--:--'
 337                 return '%02d:%02d' % (eta_mins, eta_secs)
 338
 339         @staticmethod
 340         def calc_speed(start, now, bytes):
 341                 dif = now - start
 342                 if bytes == 0 or dif < 0.001: # One millisecond
 343                         return '%10s' % '---b/s'
 344                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 345
 346         @staticmethod
 347         def best_block_size(elapsed_time, bytes):
 348                 new_min = max(bytes / 2.0, 1.0)
 349                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 350                 if elapsed_time < 0.001:
 351                         return long(new_max)
 352                 rate = bytes / elapsed_time
 353                 if rate > new_max:
 354                         return long(new_max)
 355                 if rate < new_min:
 356                         return long(new_min)
 357                 return long(rate)
 358
 359         @staticmethod
 360         def parse_bytes(bytestr):
 361                 """Parse a string indicating a byte quantity into a long integer."""
 362                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 363                 if matchobj is None:
 364                         return None
 365                 number = float(matchobj.group(1))
 366                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 367                 return long(round(number * multiplier))
 368
 369         def add_info_extractor(self, ie):
 370                 """Add an InfoExtractor object to the end of the list."""
 371                 self._ies.append(ie)
 372                 ie.set_downloader(self)
 373
 374         def add_post_processor(self, pp):
 375                 """Add a PostProcessor object to the end of the chain."""
 376                 self._pps.append(pp)
 377                 pp.set_downloader(self)
 378
 379         def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
 380                 """Print message to stdout if not in quiet mode."""
 381                 try:
 382                         if not self.params.get('quiet', False):
 383                                 terminator = [u'\n', u''][skip_eol]
 384                                 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
 385                         self._screen_file.flush()
 386                 except (UnicodeEncodeError), err:
 387                         if not ignore_encoding_errors:
 388                                 raise
 389
 390         def to_stderr(self, message):
 391                 """Print message to stderr."""
 392                 print >>sys.stderr, message.encode(preferredencoding())
 393
 394         def to_cons_title(self, message):
 395                 """Set console/terminal window title to message."""
 396                 if not self.params.get('consoletitle', False):
 397                         return
 398                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 399                         # c_wchar_p() might not be necessary if `message` is
 400                         # already of type unicode()
 401                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 402                 elif 'TERM' in os.environ:
 403                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
 404
 405         def fixed_template(self):
 406                 """Checks if the output template is fixed."""
 407                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 408
 409         def trouble(self, message=None):
 410                 """Determine action to take when a download problem appears.
 411
 412                 Depending on if the downloader has been configured to ignore
 413                 download errors or not, this method may throw an exception or
 414                 not when errors are found, after printing the message.
 415                 """
 416                 if message is not None:
 417                         self.to_stderr(message)
 418                 if not self.params.get('ignoreerrors', False):
 419                         raise DownloadError(message)
 420                 self._download_retcode = 1
 421
 422         def slow_down(self, start_time, byte_counter):
 423                 """Sleep if the download speed is over the rate limit."""
 424                 rate_limit = self.params.get('ratelimit', None)
 425                 if rate_limit is None or byte_counter == 0:
 426                         return
 427                 now = time.time()
 428                 elapsed = now - start_time
 429                 if elapsed <= 0.0:
 430                         return
 431                 speed = float(byte_counter) / elapsed
 432                 if speed > rate_limit:
 433                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 434
 435         def temp_name(self, filename):
 436                 """Returns a temporary filename for the given filename."""
 437                 if self.params.get('nopart', False) or filename == u'-' or \
 438                                 (os.path.exists(filename) and not os.path.isfile(filename)):
 439                         return filename
 440                 return filename + u'.part'
 441
 442         def undo_temp_name(self, filename):
 443                 if filename.endswith(u'.part'):
 444                         return filename[:-len(u'.part')]
 445                 return filename
 446
 447         def try_rename(self, old_filename, new_filename):
 448                 try:
 449                         if old_filename == new_filename:
 450                                 return
 451                         os.rename(old_filename, new_filename)
 452                 except (IOError, OSError), err:
 453                         self.trouble(u'ERROR: unable to rename file')
 454
 455         def report_destination(self, filename):
 456                 """Report destination filename."""
 457                 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
 458
 459         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 460                 """Report download progress."""
 461                 if self.params.get('noprogress', False):
 462                         return
 463                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 464                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 465                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 466                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 467
 468         def report_resuming_byte(self, resume_len):
 469                 """Report attempt to resume at given byte."""
 470                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 471
 472         def report_retry(self, count, retries):
 473                 """Report retry in case of HTTP error 5xx"""
 474                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 475
 476         def report_file_already_downloaded(self, file_name):
 477                 """Report file has already been fully downloaded."""
 478                 try:
 479                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
 480                 except (UnicodeEncodeError), err:
 481                         self.to_screen(u'[download] The file has already been downloaded')
 482
 483         def report_unable_to_resume(self):
 484                 """Report it was impossible to resume download."""
 485                 self.to_screen(u'[download] Unable to resume')
 486
 487         def report_finish(self):
 488                 """Report download finished."""
 489                 if self.params.get('noprogress', False):
 490                         self.to_screen(u'[download] Download completed')
 491                 else:
 492                         self.to_screen(u'')
 493
 494         def increment_downloads(self):
 495                 """Increment the ordinal that assigns a number to each file."""
 496                 self._num_downloads += 1
 497
 498         def prepare_filename(self, info_dict):
 499                 """Generate the output filename."""
 500                 try:
 501                         template_dict = dict(info_dict)
 502                         template_dict['epoch'] = unicode(long(time.time()))
 503                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
 504                         filename = self.params['outtmpl'] % template_dict
 505                         return filename
 506                 except (ValueError, KeyError), err:
 507                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
 508                         return None
 509
 510         def process_info(self, info_dict):
 511                 """Process a single dictionary returned by an InfoExtractor."""
 512                 filename = self.prepare_filename(info_dict)
 513                 # Do nothing else if in simulate mode
 514                 if self.params.get('simulate', False):
 515                         # Forced printings
 516                         if self.params.get('forcetitle', False):
 517                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
 518                         if self.params.get('forceurl', False):
 519                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
 520                         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 521                                 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
 522                         if self.params.get('forcedescription', False) and 'description' in info_dict:
 523                                 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
 524                         if self.params.get('forcefilename', False) and filename is not None:
 525                                 print filename.encode(preferredencoding(), 'xmlcharrefreplace')
 526
 527                         return
 528
 529                 if filename is None:
 530                         return
 531                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
 532                         self.to_stderr(u'WARNING: file exists and will be skipped')
 533                         return
 534
 535                 try:
 536                         self.pmkdir(filename)
 537                 except (OSError, IOError), err:
 538                         self.trouble(u'ERROR: unable to create directories: %s' % str(err))
 539                         return
 540
 541                 try:
 542                         success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
 543                 except (OSError, IOError), err:
 544                         raise UnavailableVideoError
 545                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 546                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 547                         return
 548                 except (ContentTooShortError, ), err:
 549                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 550                         return
 551
 552                 if success:
 553                         try:
 554                                 self.post_process(filename, info_dict)
 555                         except (PostProcessingError), err:
 556                                 self.trouble(u'ERROR: postprocessing: %s' % str(err))
 557                                 return
 558
 559         def download(self, url_list):
 560                 """Download a given list of URLs."""
 561                 if len(url_list) > 1 and self.fixed_template():
 562                         raise SameFileError(self.params['outtmpl'])
 563
 564                 for url in url_list:
 565                         suitable_found = False
 566                         for ie in self._ies:
 567                                 # Go to next InfoExtractor if not suitable
 568                                 if not ie.suitable(url):
 569                                         continue
 570
 571                                 # Suitable InfoExtractor found
 572                                 suitable_found = True
 573
 574                                 # Extract information from URL and process it
 575                                 ie.extract(url)
 576
 577                                 # Suitable InfoExtractor had been found; go to next URL
 578                                 break
 579
 580                         if not suitable_found:
 581                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
 582
 583                 return self._download_retcode
 584
 585         def post_process(self, filename, ie_info):
 586                 """Run the postprocessing chain on the given file."""
 587                 info = dict(ie_info)
 588                 info['filepath'] = filename
 589                 for pp in self._pps:
 590                         info = pp.run(info)
 591                         if info is None:
 592                                 break
 593
 594         def _download_with_rtmpdump(self, filename, url, player_url):
 595                 self.report_destination(filename)
 596                 tmpfilename = self.temp_name(filename)
 597
 598                 # Check for rtmpdump first
 599                 try:
 600                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 601                 except (OSError, IOError):
 602                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 603                         return False
 604
 605                 # Download using rtmpdump. rtmpdump returns exit code 2 when
 606                 # the connection was interrumpted and resuming appears to be
 607                 # possible. This is part of rtmpdump's normal usage, AFAIK.
 608                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
 609                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
 610                 while retval == 2 or retval == 1:
 611                         prevsize = os.path.getsize(tmpfilename)
 612                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 613                         time.sleep(5.0) # This seems to be needed
 614                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 615                         cursize = os.path.getsize(tmpfilename)
 616                         if prevsize == cursize and retval == 1:
 617                                 break
 618                 if retval == 0:
 619                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
 620                         self.try_rename(tmpfilename, filename)
 621                         return True
 622                 else:
 623                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
 624                         return False
 625
 626         def _do_download(self, filename, url, player_url):
 627                 # Check file already present
 628                 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
 629                         self.report_file_already_downloaded(filename)
 630                         return True
 631
 632                 # Attempt to download using rtmpdump
 633                 if url.startswith('rtmp'):
 634                         return self._download_with_rtmpdump(filename, url, player_url)
 635
 636                 tmpfilename = self.temp_name(filename)
 637                 stream = None
 638                 open_mode = 'wb'
 639
 640                 # Do not include the Accept-Encoding header
 641                 headers = {'Youtubedl-no-compression': 'True'}
 642                 basic_request = urllib2.Request(url, None, headers)
 643                 request = urllib2.Request(url, None, headers)
 644
 645                 # Establish possible resume length
 646                 if os.path.isfile(tmpfilename):
 647                         resume_len = os.path.getsize(tmpfilename)
 648                 else:
 649                         resume_len = 0
 650
 651                 # Request parameters in case of being able to resume
 652                 if self.params.get('continuedl', False) and resume_len != 0:
 653                         self.report_resuming_byte(resume_len)
 654                         request.add_header('Range','bytes=%d-' % resume_len)
 655                         open_mode = 'ab'
 656
 657                 count = 0
 658                 retries = self.params.get('retries', 0)
 659                 while count <= retries:
 660                         # Establish connection
 661                         try:
 662                                 data = urllib2.urlopen(request)
 663                                 break
 664                         except (urllib2.HTTPError, ), err:
 665                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 666                                         # Unexpected HTTP error
 667                                         raise
 668                                 elif err.code == 416:
 669                                         # Unable to resume (requested range not satisfiable)
 670                                         try:
 671                                                 # Open the connection again without the range header
 672                                                 data = urllib2.urlopen(basic_request)
 673                                                 content_length = data.info()['Content-Length']
 674                                         except (urllib2.HTTPError, ), err:
 675                                                 if err.code < 500 or err.code >= 600:
 676                                                         raise
 677                                         else:
 678                                                 # Examine the reported length
 679                                                 if (content_length is not None and
 680                                                     (resume_len - 100 < long(content_length) < resume_len + 100)):
 681                                                         # The file had already been fully downloaded.
 682                                                         # Explanation to the above condition: in issue #175 it was revealed that
 683                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
 684                                                         # changing the file size slightly and causing problems for some users. So
 685                                                         # I decided to implement a suggested change and consider the file
 686                                                         # completely downloaded if the file size differs less than 100 bytes from
 687                                                         # the one in the hard drive.
 688                                                         self.report_file_already_downloaded(filename)
 689                                                         self.try_rename(tmpfilename, filename)
 690                                                         return True
 691                                                 else:
 692                                                         # The length does not match, we start the download over
 693                                                         self.report_unable_to_resume()
 694                                                         open_mode = 'wb'
 695                                                         break
 696                         # Retry
 697                         count += 1
 698                         if count <= retries:
 699                                 self.report_retry(count, retries)
 700
 701                 if count > retries:
 702                         self.trouble(u'ERROR: giving up after %s retries' % retries)
 703                         return False
 704
 705                 data_len = data.info().get('Content-length', None)
 706                 if data_len is not None:
 707                         data_len = long(data_len) + resume_len
 708                 data_len_str = self.format_bytes(data_len)
 709                 byte_counter = 0 + resume_len
 710                 block_size = 1024
 711                 start = time.time()
 712                 while True:
 713                         # Download and write
 714                         before = time.time()
 715                         data_block = data.read(block_size)
 716                         after = time.time()
 717                         if len(data_block) == 0:
 718                                 break
 719                         byte_counter += len(data_block)
 720
 721                         # Open file just in time
 722                         if stream is None:
 723                                 try:
 724                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 725                                         filename = self.undo_temp_name(tmpfilename)
 726                                         self.report_destination(filename)
 727                                 except (OSError, IOError), err:
 728                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
 729                                         return False
 730                         try:
 731                                 stream.write(data_block)
 732                         except (IOError, OSError), err:
 733                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 734                                 return False
 735                         block_size = self.best_block_size(after - before, len(data_block))
 736
 737                         # Progress message
 738                         percent_str = self.calc_percent(byte_counter, data_len)
 739                         eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
 740                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 741                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 742
 743                         # Apply rate limit
 744                         self.slow_down(start, byte_counter - resume_len)
 745
 746                 stream.close()
 747                 self.report_finish()
 748                 if data_len is not None and byte_counter != data_len:
 749                         raise ContentTooShortError(byte_counter, long(data_len))
 750                 self.try_rename(tmpfilename, filename)
 751                 return True
 752
 753 class InfoExtractor(object):
 754         """Information Extractor class.
 755
 756         Information extractors are the classes that, given a URL, extract
 757         information from the video (or videos) the URL refers to. This
 758         information includes the real video URL, the video title and simplified
 759         title, author and others. The information is stored in a dictionary
 760         which is then passed to the FileDownloader. The FileDownloader
 761         processes this information possibly downloading the video to the file
 762         system, among other possible outcomes. The dictionaries must include
 763         the following fields:
 764
 765         id:             Video identifier.
 766         url:            Final video URL.
 767         uploader:       Nickname of the video uploader.
 768         title:          Literal title.
 769         stitle:         Simplified title.
 770         ext:            Video filename extension.
 771         format:         Video format.
 772         player_url:     SWF Player URL (may be None).
 773
 774         The following fields are optional. Their primary purpose is to allow
 775         youtube-dl to serve as the backend for a video search function, such
 776         as the one in youtube2mp3.  They are only used when their respective
 777         forced printing functions are called:
 778
 779         thumbnail:      Full URL to a video thumbnail image.
 780         description:    One-line video description.
 781
 782         Subclasses of this one should re-define the _real_initialize() and
 783         _real_extract() methods, as well as the suitable() static method.
 784         Probably, they should also be instantiated and added to the main
 785         downloader.
 786         """
 787
 788         _ready = False
 789         _downloader = None
 790
 791         def __init__(self, downloader=None):
 792                 """Constructor. Receives an optional downloader."""
 793                 self._ready = False
 794                 self.set_downloader(downloader)
 795
 796         @staticmethod
 797         def suitable(url):
 798                 """Receives a URL and returns True if suitable for this IE."""
 799                 return False
 800
 801         def initialize(self):
 802                 """Initializes an instance (authentication, etc)."""
 803                 if not self._ready:
 804                         self._real_initialize()
 805                         self._ready = True
 806
 807         def extract(self, url):
 808                 """Extracts URL information and returns it in list of dicts."""
 809                 self.initialize()
 810                 return self._real_extract(url)
 811
 812         def set_downloader(self, downloader):
 813                 """Sets the downloader for this IE."""
 814                 self._downloader = downloader
 815
 816         def _real_initialize(self):
 817                 """Real initialization process. Redefine in subclasses."""
 818                 pass
 819
 820         def _real_extract(self, url):
 821                 """Real extraction process. Redefine in subclasses."""
 822                 pass
 823
 824 class YoutubeIE(InfoExtractor):
 825         """Information extractor for youtube.com."""
 826
 827         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
 828         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
 829         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
 830         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
 831         _NETRC_MACHINE = 'youtube'
 832         # Listed in order of quality
 833         _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
 834         _video_extensions = {
 835                 '13': '3gp',
 836                 '17': 'mp4',
 837                 '18': 'mp4',
 838                 '22': 'mp4',
 839                 '37': 'mp4',
 840                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
 841                 '43': 'webm',
 842                 '45': 'webm',
 843         }
 844
 845         @staticmethod
 846         def suitable(url):
 847                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
 848
 849         def report_lang(self):
 850                 """Report attempt to set language."""
 851                 self._downloader.to_screen(u'[youtube] Setting language')
 852
 853         def report_login(self):
 854                 """Report attempt to log in."""
 855                 self._downloader.to_screen(u'[youtube] Logging in')
 856
 857         def report_age_confirmation(self):
 858                 """Report attempt to confirm age."""
 859                 self._downloader.to_screen(u'[youtube] Confirming age')
 860
 861         def report_video_webpage_download(self, video_id):
 862                 """Report attempt to download video webpage."""
 863                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
 864
 865         def report_video_info_webpage_download(self, video_id):
 866                 """Report attempt to download video info webpage."""
 867                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
 868
 869         def report_information_extraction(self, video_id):
 870                 """Report attempt to extract video information."""
 871                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
 872
 873         def report_unavailable_format(self, video_id, format):
 874                 """Report extracted video URL."""
 875                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
 876
 877         def report_rtmp_download(self):
 878                 """Indicate the download will use the RTMP protocol."""
 879                 self._downloader.to_screen(u'[youtube] RTMP download detected')
 880
 881         def _real_initialize(self):
 882                 if self._downloader is None:
 883                         return
 884
 885                 username = None
 886                 password = None
 887                 downloader_params = self._downloader.params
 888
 889                 # Attempt to use provided username and password or .netrc data
 890                 if downloader_params.get('username', None) is not None:
 891                         username = downloader_params['username']
 892                         password = downloader_params['password']
 893                 elif downloader_params.get('usenetrc', False):
 894                         try:
 895                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
 896                                 if info is not None:
 897                                         username = info[0]
 898                                         password = info[2]
 899                                 else:
 900                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 901                         except (IOError, netrc.NetrcParseError), err:
 902                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
 903                                 return
 904
 905                 # Set language
 906                 request = urllib2.Request(self._LANG_URL)
 907                 try:
 908                         self.report_lang()
 909                         urllib2.urlopen(request).read()
 910                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 911                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
 912                         return
 913
 914                 # No authentication to be performed
 915                 if username is None:
 916                         return
 917
 918                 # Log in
 919                 login_form = {
 920                                 'current_form': 'loginForm',
 921                                 'next':         '/',
 922                                 'action_login': 'Log In',
 923                                 'username':     username,
 924                                 'password':     password,
 925                                 }
 926                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
 927                 try:
 928                         self.report_login()
 929                         login_results = urllib2.urlopen(request).read()
 930                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
 931                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
 932                                 return
 933                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 934                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
 935                         return
 936
 937                 # Confirm age
 938                 age_form = {
 939                                 'next_url':             '/',
 940                                 'action_confirm':       'Confirm',
 941                                 }
 942                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
 943                 try:
 944                         self.report_age_confirmation()
 945                         age_results = urllib2.urlopen(request).read()
 946                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 947                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 948                         return
 949
 950         def _real_extract(self, url):
 951                 # Extract video id from URL
 952                 mobj = re.match(self._VALID_URL, url)
 953                 if mobj is None:
 954                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 955                         return
 956                 video_id = mobj.group(2)
 957
 958                 # Get video webpage
 959                 self.report_video_webpage_download(video_id)
 960                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id)
 961                 try:
 962                         video_webpage = urllib2.urlopen(request).read()
 963                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 964                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
 965                         return
 966
 967                 # Attempt to extract SWF player URL
 968                 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
 969                 if mobj is not None:
 970                         player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
 971                 else:
 972                         player_url = None
 973
 974                 # Get video info
 975                 self.report_video_info_webpage_download(video_id)
 976                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
 977                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
 978                                            % (video_id, el_type))
 979                         request = urllib2.Request(video_info_url)
 980                         try:
 981                                 video_info_webpage = urllib2.urlopen(request).read()
 982                                 video_info = parse_qs(video_info_webpage)
 983                                 if 'token' in video_info:
 984                                         break
 985                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 986                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
 987                                 return
 988                 if 'token' not in video_info:
 989                         if 'reason' in video_info:
 990                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
 991                         else:
 992                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
 993                         return
 994
 995                 # Start extracting information
 996                 self.report_information_extraction(video_id)
 997
 998                 # uploader
 999                 if 'author' not in video_info:
1000                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1001                         return
1002                 video_uploader = urllib.unquote_plus(video_info['author'][0])
1003
1004                 # title
1005                 if 'title' not in video_info:
1006                         self._downloader.trouble(u'ERROR: unable to extract video title')
1007                         return
1008                 video_title = urllib.unquote_plus(video_info['title'][0])
1009                 video_title = video_title.decode('utf-8')
1010                 video_title = sanitize_title(video_title)
1011
1012                 # simplified title
1013                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1014                 simple_title = simple_title.strip(ur'_')
1015
1016                 # thumbnail image
1017                 if 'thumbnail_url' not in video_info:
1018                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1019                         video_thumbnail = ''
1020                 else:   # don't panic if we can't find it
1021                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1022
1023                 # upload date
1024                 upload_date = u'NA'
1025                 mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
1026                 if mobj is not None:
1027                         upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1028                         format_expressions = ['%d %B %Y', '%B %d %Y']
1029                         for expression in format_expressions:
1030                                 try:
1031                                         upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1032                                 except:
1033                                         pass
1034
1035                 # description
1036                 video_description = 'No description available.'
1037                 if self._downloader.params.get('forcedescription', False):
1038                         mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1039                         if mobj is not None:
1040                                 video_description = mobj.group(1)
1041
1042                 # token
1043                 video_token = urllib.unquote_plus(video_info['token'][0])
1044
1045                 # Decide which formats to download
1046                 req_format = self._downloader.params.get('format', None)
1047
1048                 if 'fmt_url_map' in video_info:
1049                         url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
1050                         format_limit = self._downloader.params.get('format_limit', None)
1051                         if format_limit is not None and format_limit in self._available_formats:
1052                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
1053                         else:
1054                                 format_list = self._available_formats
1055                         existing_formats = [x for x in format_list if x in url_map]
1056                         if len(existing_formats) == 0:
1057                                 self._downloader.trouble(u'ERROR: no known formats available for video')
1058                                 return
1059                         if req_format is None:
1060                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1061                         elif req_format == '-1':
1062                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1063                         else:
1064                                 # Specific format
1065                                 if req_format not in url_map:
1066                                         self._downloader.trouble(u'ERROR: requested format not available')
1067                                         return
1068                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
1069
1070                 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1071                         self.report_rtmp_download()
1072                         video_url_list = [(None, video_info['conn'][0])]
1073
1074                 else:
1075                         self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
1076                         return
1077
1078                 for format_param, video_real_url in video_url_list:
1079                         # At this point we have a new video
1080                         self._downloader.increment_downloads()
1081
1082                         # Extension
1083                         video_extension = self._video_extensions.get(format_param, 'flv')
1084
1085                         # Find the video URL in fmt_url_map or conn paramters
1086                         try:
1087                                 # Process video information
1088                                 self._downloader.process_info({
1089                                         'id':           video_id.decode('utf-8'),
1090                                         'url':          video_real_url.decode('utf-8'),
1091                                         'uploader':     video_uploader.decode('utf-8'),
1092                                         'upload_date':  upload_date,
1093                                         'title':        video_title,
1094                                         'stitle':       simple_title,
1095                                         'ext':          video_extension.decode('utf-8'),
1096                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
1097                                         'thumbnail':    video_thumbnail.decode('utf-8'),
1098                                         'description':  video_description.decode('utf-8'),
1099                                         'player_url':   player_url,
1100                                 })
1101                         except UnavailableVideoError, err:
1102                                 self._downloader.trouble(u'\nERROR: unable to download video')
1103
1104
1105 class MetacafeIE(InfoExtractor):
1106         """Information Extractor for metacafe.com."""
1107
1108         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1109         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1110         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1111         _youtube_ie = None
1112
1113         def __init__(self, youtube_ie, downloader=None):
1114                 InfoExtractor.__init__(self, downloader)
1115                 self._youtube_ie = youtube_ie
1116
1117         @staticmethod
1118         def suitable(url):
1119                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1120
1121         def report_disclaimer(self):
1122                 """Report disclaimer retrieval."""
1123                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1124
1125         def report_age_confirmation(self):
1126                 """Report attempt to confirm age."""
1127                 self._downloader.to_screen(u'[metacafe] Confirming age')
1128
1129         def report_download_webpage(self, video_id):
1130                 """Report webpage download."""
1131                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1132
1133         def report_extraction(self, video_id):
1134                 """Report information extraction."""
1135                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1136
1137         def _real_initialize(self):
1138                 # Retrieve disclaimer
1139                 request = urllib2.Request(self._DISCLAIMER)
1140                 try:
1141                         self.report_disclaimer()
1142                         disclaimer = urllib2.urlopen(request).read()
1143                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1144                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1145                         return
1146
1147                 # Confirm age
1148                 disclaimer_form = {
1149                         'filters': '0',
1150                         'submit': "Continue - I'm over 18",
1151                         }
1152                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
1153                 try:
1154                         self.report_age_confirmation()
1155                         disclaimer = urllib2.urlopen(request).read()
1156                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1157                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1158                         return
1159
1160         def _real_extract(self, url):
1161                 # Extract id and simplified title from URL
1162                 mobj = re.match(self._VALID_URL, url)
1163                 if mobj is None:
1164                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1165                         return
1166
1167                 video_id = mobj.group(1)
1168
1169                 # Check if video comes from YouTube
1170                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1171                 if mobj2 is not None:
1172                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1173                         return
1174
1175                 # At this point we have a new video
1176                 self._downloader.increment_downloads()
1177
1178                 simple_title = mobj.group(2).decode('utf-8')
1179
1180                 # Retrieve video webpage to extract further information
1181                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1182                 try:
1183                         self.report_download_webpage(video_id)
1184                         webpage = urllib2.urlopen(request).read()
1185                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1186                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1187                         return
1188
1189                 # Extract URL, uploader and title from webpage
1190                 self.report_extraction(video_id)
1191                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1192                 if mobj is not None:
1193                         mediaURL = urllib.unquote(mobj.group(1))
1194                         video_extension = mediaURL[-3:]
1195
1196                         # Extract gdaKey if available
1197                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1198                         if mobj is None:
1199                                 video_url = mediaURL
1200                         else:
1201                                 gdaKey = mobj.group(1)
1202                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1203                 else:
1204                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1205                         if mobj is None:
1206                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1207                                 return
1208                         vardict = parse_qs(mobj.group(1))
1209                         if 'mediaData' not in vardict:
1210                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1211                                 return
1212                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1213                         if mobj is None:
1214                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1215                                 return
1216                         mediaURL = mobj.group(1).replace('\\/', '/')
1217                         video_extension = mediaURL[-3:]
1218                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1219
1220                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1221                 if mobj is None:
1222                         self._downloader.trouble(u'ERROR: unable to extract title')
1223                         return
1224                 video_title = mobj.group(1).decode('utf-8')
1225                 video_title = sanitize_title(video_title)
1226
1227                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1228                 if mobj is None:
1229                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1230                         return
1231                 video_uploader = mobj.group(1)
1232
1233                 try:
1234                         # Process video information
1235                         self._downloader.process_info({
1236                                 'id':           video_id.decode('utf-8'),
1237                                 'url':          video_url.decode('utf-8'),
1238                                 'uploader':     video_uploader.decode('utf-8'),
1239                                 'upload_date':  u'NA',
1240                                 'title':        video_title,
1241                                 'stitle':       simple_title,
1242                                 'ext':          video_extension.decode('utf-8'),
1243                                 'format':       u'NA',
1244                                 'player_url':   None,
1245                         })
1246                 except UnavailableVideoError:
1247                         self._downloader.trouble(u'\nERROR: unable to download video')
1248
1249
1250 class DailymotionIE(InfoExtractor):
1251         """Information Extractor for Dailymotion"""
1252
1253         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1254
1255         def __init__(self, downloader=None):
1256                 InfoExtractor.__init__(self, downloader)
1257
1258         @staticmethod
1259         def suitable(url):
1260                 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1261
1262         def report_download_webpage(self, video_id):
1263                 """Report webpage download."""
1264                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1265
1266         def report_extraction(self, video_id):
1267                 """Report information extraction."""
1268                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1269
1270         def _real_initialize(self):
1271                 return
1272
1273         def _real_extract(self, url):
1274                 # Extract id and simplified title from URL
1275                 mobj = re.match(self._VALID_URL, url)
1276                 if mobj is None:
1277                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1278                         return
1279
1280                 # At this point we have a new video
1281                 self._downloader.increment_downloads()
1282                 video_id = mobj.group(1)
1283
1284                 simple_title = mobj.group(2).decode('utf-8')
1285                 video_extension = 'flv'
1286
1287                 # Retrieve video webpage to extract further information
1288                 request = urllib2.Request(url)
1289                 try:
1290                         self.report_download_webpage(video_id)
1291                         webpage = urllib2.urlopen(request).read()
1292                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1293                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1294                         return
1295
1296                 # Extract URL, uploader and title from webpage
1297                 self.report_extraction(video_id)
1298                 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1299                 if mobj is None:
1300                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1301                         return
1302                 mediaURL = urllib.unquote(mobj.group(1))
1303
1304                 # if needed add http://www.dailymotion.com/ if relative URL
1305
1306                 video_url = mediaURL
1307
1308                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1309                 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1310                 if mobj is None:
1311                         self._downloader.trouble(u'ERROR: unable to extract title')
1312                         return
1313                 video_title = mobj.group(1).decode('utf-8')
1314                 video_title = sanitize_title(video_title)
1315
1316                 mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
1317                 if mobj is None:
1318                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1319                         return
1320                 video_uploader = mobj.group(1)
1321
1322                 try:
1323                         # Process video information
1324                         self._downloader.process_info({
1325                                 'id':           video_id.decode('utf-8'),
1326                                 'url':          video_url.decode('utf-8'),
1327                                 'uploader':     video_uploader.decode('utf-8'),
1328                                 'upload_date':  u'NA',
1329                                 'title':        video_title,
1330                                 'stitle':       simple_title,
1331                                 'ext':          video_extension.decode('utf-8'),
1332                                 'format':       u'NA',
1333                                 'player_url':   None,
1334                         })
1335                 except UnavailableVideoError:
1336                         self._downloader.trouble(u'\nERROR: unable to download video')
1337
1338 class GoogleIE(InfoExtractor):
1339         """Information extractor for video.google.com."""
1340
1341         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1342
1343         def __init__(self, downloader=None):
1344                 InfoExtractor.__init__(self, downloader)
1345
1346         @staticmethod
1347         def suitable(url):
1348                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1349
1350         def report_download_webpage(self, video_id):
1351                 """Report webpage download."""
1352                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1353
1354         def report_extraction(self, video_id):
1355                 """Report information extraction."""
1356                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1357
1358         def _real_initialize(self):
1359                 return
1360
1361         def _real_extract(self, url):
1362                 # Extract id from URL
1363                 mobj = re.match(self._VALID_URL, url)
1364                 if mobj is None:
1365                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1366                         return
1367
1368                 # At this point we have a new video
1369                 self._downloader.increment_downloads()
1370                 video_id = mobj.group(1)
1371
1372                 video_extension = 'mp4'
1373
1374                 # Retrieve video webpage to extract further information
1375                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1376                 try:
1377                         self.report_download_webpage(video_id)
1378                         webpage = urllib2.urlopen(request).read()
1379                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1380                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1381                         return
1382
1383                 # Extract URL, uploader, and title from webpage
1384                 self.report_extraction(video_id)
1385                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1386                 if mobj is None:
1387                         video_extension = 'flv'
1388                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1389                 if mobj is None:
1390                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1391                         return
1392                 mediaURL = urllib.unquote(mobj.group(1))
1393                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1394                 mediaURL = mediaURL.replace('\\x26', '\x26')
1395
1396                 video_url = mediaURL
1397
1398                 mobj = re.search(r'<title>(.*)</title>', webpage)
1399                 if mobj is None:
1400                         self._downloader.trouble(u'ERROR: unable to extract title')
1401                         return
1402                 video_title = mobj.group(1).decode('utf-8')
1403                 video_title = sanitize_title(video_title)
1404                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1405
1406                 # Extract video description
1407                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1408                 if mobj is None:
1409                         self._downloader.trouble(u'ERROR: unable to extract video description')
1410                         return
1411                 video_description = mobj.group(1).decode('utf-8')
1412                 if not video_description:
1413                         video_description = 'No description available.'
1414
1415                 # Extract video thumbnail
1416                 if self._downloader.params.get('forcethumbnail', False):
1417                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1418                         try:
1419                                 webpage = urllib2.urlopen(request).read()
1420                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1421                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1422                                 return
1423                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1424                         if mobj is None:
1425                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1426                                 return
1427                         video_thumbnail = mobj.group(1)
1428                 else:   # we need something to pass to process_info
1429                         video_thumbnail = ''
1430
1431
1432                 try:
1433                         # Process video information
1434                         self._downloader.process_info({
1435                                 'id':           video_id.decode('utf-8'),
1436                                 'url':          video_url.decode('utf-8'),
1437                                 'uploader':     u'NA',
1438                                 'upload_date':  u'NA',
1439                                 'title':        video_title,
1440                                 'stitle':       simple_title,
1441                                 'ext':          video_extension.decode('utf-8'),
1442                                 'format':       u'NA',
1443                                 'player_url':   None,
1444                         })
1445                 except UnavailableVideoError:
1446                         self._downloader.trouble(u'\nERROR: unable to download video')
1447
1448
1449 class PhotobucketIE(InfoExtractor):
1450         """Information extractor for photobucket.com."""
1451
1452         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1453
1454         def __init__(self, downloader=None):
1455                 InfoExtractor.__init__(self, downloader)
1456
1457         @staticmethod
1458         def suitable(url):
1459                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1460
1461         def report_download_webpage(self, video_id):
1462                 """Report webpage download."""
1463                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1464
1465         def report_extraction(self, video_id):
1466                 """Report information extraction."""
1467                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1468
1469         def _real_initialize(self):
1470                 return
1471
1472         def _real_extract(self, url):
1473                 # Extract id from URL
1474                 mobj = re.match(self._VALID_URL, url)
1475                 if mobj is None:
1476                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1477                         return
1478
1479                 # At this point we have a new video
1480                 self._downloader.increment_downloads()
1481                 video_id = mobj.group(1)
1482
1483                 video_extension = 'flv'
1484
1485                 # Retrieve video webpage to extract further information
1486                 request = urllib2.Request(url)
1487                 try:
1488                         self.report_download_webpage(video_id)
1489                         webpage = urllib2.urlopen(request).read()
1490                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1491                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1492                         return
1493
1494                 # Extract URL, uploader, and title from webpage
1495                 self.report_extraction(video_id)
1496                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1497                 if mobj is None:
1498                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1499                         return
1500                 mediaURL = urllib.unquote(mobj.group(1))
1501
1502                 video_url = mediaURL
1503
1504                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1505                 if mobj is None:
1506                         self._downloader.trouble(u'ERROR: unable to extract title')
1507                         return
1508                 video_title = mobj.group(1).decode('utf-8')
1509                 video_title = sanitize_title(video_title)
1510                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1511
1512                 video_uploader = mobj.group(2).decode('utf-8')
1513
1514                 try:
1515                         # Process video information
1516                         self._downloader.process_info({
1517                                 'id':           video_id.decode('utf-8'),
1518                                 'url':          video_url.decode('utf-8'),
1519                                 'uploader':     video_uploader,
1520                                 'upload_date':  u'NA',
1521                                 'title':        video_title,
1522                                 'stitle':       simple_title,
1523                                 'ext':          video_extension.decode('utf-8'),
1524                                 'format':       u'NA',
1525                                 'player_url':   None,
1526                         })
1527                 except UnavailableVideoError:
1528                         self._downloader.trouble(u'\nERROR: unable to download video')
1529
1530
1531 class YahooIE(InfoExtractor):
1532         """Information extractor for video.yahoo.com."""
1533
1534         # _VALID_URL matches all Yahoo! Video URLs
1535         # _VPAGE_URL matches only the extractable '/watch/' URLs
1536         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1537         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1538
1539         def __init__(self, downloader=None):
1540                 InfoExtractor.__init__(self, downloader)
1541
1542         @staticmethod
1543         def suitable(url):
1544                 return (re.match(YahooIE._VALID_URL, url) is not None)
1545
1546         def report_download_webpage(self, video_id):
1547                 """Report webpage download."""
1548                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1549
1550         def report_extraction(self, video_id):
1551                 """Report information extraction."""
1552                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1553
1554         def _real_initialize(self):
1555                 return
1556
1557         def _real_extract(self, url, new_video=True):
1558                 # Extract ID from URL
1559                 mobj = re.match(self._VALID_URL, url)
1560                 if mobj is None:
1561                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1562                         return
1563
1564                 # At this point we have a new video
1565                 self._downloader.increment_downloads()
1566                 video_id = mobj.group(2)
1567                 video_extension = 'flv'
1568
1569                 # Rewrite valid but non-extractable URLs as
1570                 # extractable English language /watch/ URLs
1571                 if re.match(self._VPAGE_URL, url) is None:
1572                         request = urllib2.Request(url)
1573                         try:
1574                                 webpage = urllib2.urlopen(request).read()
1575                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1576                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1577                                 return
1578
1579                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1580                         if mobj is None:
1581                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1582                                 return
1583                         yahoo_id = mobj.group(1)
1584
1585                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1586                         if mobj is None:
1587                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1588                                 return
1589                         yahoo_vid = mobj.group(1)
1590
1591                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1592                         return self._real_extract(url, new_video=False)
1593
1594                 # Retrieve video webpage to extract further information
1595                 request = urllib2.Request(url)
1596                 try:
1597                         self.report_download_webpage(video_id)
1598                         webpage = urllib2.urlopen(request).read()
1599                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1600                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1601                         return
1602
1603                 # Extract uploader and title from webpage
1604                 self.report_extraction(video_id)
1605                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1606                 if mobj is None:
1607                         self._downloader.trouble(u'ERROR: unable to extract video title')
1608                         return
1609                 video_title = mobj.group(1).decode('utf-8')
1610                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1611
1612                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1613                 if mobj is None:
1614                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1615                         return
1616                 video_uploader = mobj.group(1).decode('utf-8')
1617
1618                 # Extract video thumbnail
1619                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1620                 if mobj is None:
1621                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1622                         return
1623                 video_thumbnail = mobj.group(1).decode('utf-8')
1624
1625                 # Extract video description
1626                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1627                 if mobj is None:
1628                         self._downloader.trouble(u'ERROR: unable to extract video description')
1629                         return
1630                 video_description = mobj.group(1).decode('utf-8')
1631                 if not video_description: video_description = 'No description available.'
1632
1633                 # Extract video height and width
1634                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1635                 if mobj is None:
1636                         self._downloader.trouble(u'ERROR: unable to extract video height')
1637                         return
1638                 yv_video_height = mobj.group(1)
1639
1640                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1641                 if mobj is None:
1642                         self._downloader.trouble(u'ERROR: unable to extract video width')
1643                         return
1644                 yv_video_width = mobj.group(1)
1645
1646                 # Retrieve video playlist to extract media URL
1647                 # I'm not completely sure what all these options are, but we
1648                 # seem to need most of them, otherwise the server sends a 401.
1649                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1650                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1651                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1652                                           '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1653                                           '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1654                 try:
1655                         self.report_download_webpage(video_id)
1656                         webpage = urllib2.urlopen(request).read()
1657                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1658                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1659                         return
1660
1661                 # Extract media URL from playlist XML
1662                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1663                 if mobj is None:
1664                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1665                         return
1666                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1667                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1668
1669                 try:
1670                         # Process video information
1671                         self._downloader.process_info({
1672                                 'id':           video_id.decode('utf-8'),
1673                                 'url':          video_url,
1674                                 'uploader':     video_uploader,
1675                                 'upload_date':  u'NA',
1676                                 'title':        video_title,
1677                                 'stitle':       simple_title,
1678                                 'ext':          video_extension.decode('utf-8'),
1679                                 'thumbnail':    video_thumbnail.decode('utf-8'),
1680                                 'description':  video_description,
1681                                 'thumbnail':    video_thumbnail,
1682                                 'description':  video_description,
1683                                 'player_url':   None,
1684                         })
1685                 except UnavailableVideoError:
1686                         self._downloader.trouble(u'\nERROR: unable to download video')
1687
1688
1689 class GenericIE(InfoExtractor):
1690         """Generic last-resort information extractor."""
1691
1692         def __init__(self, downloader=None):
1693                 InfoExtractor.__init__(self, downloader)
1694
1695         @staticmethod
1696         def suitable(url):
1697                 return True
1698
1699         def report_download_webpage(self, video_id):
1700                 """Report webpage download."""
1701                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1702                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1703
1704         def report_extraction(self, video_id):
1705                 """Report information extraction."""
1706                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1707
1708         def _real_initialize(self):
1709                 return
1710
1711         def _real_extract(self, url):
1712                 # At this point we have a new video
1713                 self._downloader.increment_downloads()
1714
1715                 video_id = url.split('/')[-1]
1716                 request = urllib2.Request(url)
1717                 try:
1718                         self.report_download_webpage(video_id)
1719                         webpage = urllib2.urlopen(request).read()
1720                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1721                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1722                         return
1723                 except ValueError, err:
1724                         # since this is the last-resort InfoExtractor, if
1725                         # this error is thrown, it'll be thrown here
1726                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1727                         return
1728
1729                 self.report_extraction(video_id)
1730                 # Start with something easy: JW Player in SWFObject
1731                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1732                 if mobj is None:
1733                         # Broaden the search a little bit
1734                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1735                 if mobj is None:
1736                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1737                         return
1738
1739                 # It's possible that one of the regexes
1740                 # matched, but returned an empty group:
1741                 if mobj.group(1) is None:
1742                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1743                         return
1744
1745                 video_url = urllib.unquote(mobj.group(1))
1746                 video_id  = os.path.basename(video_url)
1747
1748                 # here's a fun little line of code for you:
1749                 video_extension = os.path.splitext(video_id)[1][1:]
1750                 video_id        = os.path.splitext(video_id)[0]
1751
1752                 # it's tempting to parse this further, but you would
1753                 # have to take into account all the variations like
1754                 #   Video Title - Site Name
1755                 #   Site Name | Video Title
1756                 #   Video Title - Tagline | Site Name
1757                 # and so on and so forth; it's just not practical
1758                 mobj = re.search(r'<title>(.*)</title>', webpage)
1759                 if mobj is None:
1760                         self._downloader.trouble(u'ERROR: unable to extract title')
1761                         return
1762                 video_title = mobj.group(1).decode('utf-8')
1763                 video_title = sanitize_title(video_title)
1764                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1765
1766                 # video uploader is domain name
1767                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1768                 if mobj is None:
1769                         self._downloader.trouble(u'ERROR: unable to extract title')
1770                         return
1771                 video_uploader = mobj.group(1).decode('utf-8')
1772
1773                 try:
1774                         # Process video information
1775                         self._downloader.process_info({
1776                                 'id':           video_id.decode('utf-8'),
1777                                 'url':          video_url.decode('utf-8'),
1778                                 'uploader':     video_uploader,
1779                                 'upload_date':  u'NA',
1780                                 'title':        video_title,
1781                                 'stitle':       simple_title,
1782                                 'ext':          video_extension.decode('utf-8'),
1783                                 'format':       u'NA',
1784                                 'player_url':   None,
1785                         })
1786                 except UnavailableVideoError, err:
1787                         self._downloader.trouble(u'\nERROR: unable to download video')
1788
1789
1790 class YoutubeSearchIE(InfoExtractor):
1791         """Information Extractor for YouTube search queries."""
1792         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1793         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1794         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1795         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1796         _youtube_ie = None
1797         _max_youtube_results = 1000
1798
1799         def __init__(self, youtube_ie, downloader=None):
1800                 InfoExtractor.__init__(self, downloader)
1801                 self._youtube_ie = youtube_ie
1802
1803         @staticmethod
1804         def suitable(url):
1805                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1806
1807         def report_download_page(self, query, pagenum):
1808                 """Report attempt to download playlist page with given number."""
1809                 query = query.decode(preferredencoding())
1810                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1811
1812         def _real_initialize(self):
1813                 self._youtube_ie.initialize()
1814
1815         def _real_extract(self, query):
1816                 mobj = re.match(self._VALID_QUERY, query)
1817                 if mobj is None:
1818                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1819                         return
1820
1821                 prefix, query = query.split(':')
1822                 prefix = prefix[8:]
1823                 query  = query.encode('utf-8')
1824                 if prefix == '':
1825                         self._download_n_results(query, 1)
1826                         return
1827                 elif prefix == 'all':
1828                         self._download_n_results(query, self._max_youtube_results)
1829                         return
1830                 else:
1831                         try:
1832                                 n = long(prefix)
1833                                 if n <= 0:
1834                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1835                                         return
1836                                 elif n > self._max_youtube_results:
1837                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1838                                         n = self._max_youtube_results
1839                                 self._download_n_results(query, n)
1840                                 return
1841                         except ValueError: # parsing prefix as integer fails
1842                                 self._download_n_results(query, 1)
1843                                 return
1844
1845         def _download_n_results(self, query, n):
1846                 """Downloads a specified number of results for a query"""
1847
1848                 video_ids = []
1849                 already_seen = set()
1850                 pagenum = 1
1851
1852                 while True:
1853                         self.report_download_page(query, pagenum)
1854                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1855                         request = urllib2.Request(result_url)
1856                         try:
1857                                 page = urllib2.urlopen(request).read()
1858                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1859                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1860                                 return
1861
1862                         # Extract video identifiers
1863                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1864                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1865                                 if video_id not in already_seen:
1866                                         video_ids.append(video_id)
1867                                         already_seen.add(video_id)
1868                                         if len(video_ids) == n:
1869                                                 # Specified n videos reached
1870                                                 for id in video_ids:
1871                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1872                                                 return
1873
1874                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1875                                 for id in video_ids:
1876                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1877                                 return
1878
1879                         pagenum = pagenum + 1
1880
1881 class GoogleSearchIE(InfoExtractor):
1882         """Information Extractor for Google Video search queries."""
1883         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1884         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1885         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1886         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1887         _google_ie = None
1888         _max_google_results = 1000
1889
1890         def __init__(self, google_ie, downloader=None):
1891                 InfoExtractor.__init__(self, downloader)
1892                 self._google_ie = google_ie
1893
1894         @staticmethod
1895         def suitable(url):
1896                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1897
1898         def report_download_page(self, query, pagenum):
1899                 """Report attempt to download playlist page with given number."""
1900                 query = query.decode(preferredencoding())
1901                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1902
1903         def _real_initialize(self):
1904                 self._google_ie.initialize()
1905
1906         def _real_extract(self, query):
1907                 mobj = re.match(self._VALID_QUERY, query)
1908                 if mobj is None:
1909                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1910                         return
1911
1912                 prefix, query = query.split(':')
1913                 prefix = prefix[8:]
1914                 query  = query.encode('utf-8')
1915                 if prefix == '':
1916                         self._download_n_results(query, 1)
1917                         return
1918                 elif prefix == 'all':
1919                         self._download_n_results(query, self._max_google_results)
1920                         return
1921                 else:
1922                         try:
1923                                 n = long(prefix)
1924                                 if n <= 0:
1925                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1926                                         return
1927                                 elif n > self._max_google_results:
1928                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n))
1929                                         n = self._max_google_results
1930                                 self._download_n_results(query, n)
1931                                 return
1932                         except ValueError: # parsing prefix as integer fails
1933                                 self._download_n_results(query, 1)
1934                                 return
1935
1936         def _download_n_results(self, query, n):
1937                 """Downloads a specified number of results for a query"""
1938
1939                 video_ids = []
1940                 already_seen = set()
1941                 pagenum = 1
1942
1943                 while True:
1944                         self.report_download_page(query, pagenum)
1945                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1946                         request = urllib2.Request(result_url)
1947                         try:
1948                                 page = urllib2.urlopen(request).read()
1949                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1950                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1951                                 return
1952
1953                         # Extract video identifiers
1954                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1955                                 video_id = mobj.group(1)
1956                                 if video_id not in already_seen:
1957                                         video_ids.append(video_id)
1958                                         already_seen.add(video_id)
1959                                         if len(video_ids) == n:
1960                                                 # Specified n videos reached
1961                                                 for id in video_ids:
1962                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1963                                                 return
1964
1965                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1966                                 for id in video_ids:
1967                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1968                                 return
1969
1970                         pagenum = pagenum + 1
1971
1972 class YahooSearchIE(InfoExtractor):
1973         """Information Extractor for Yahoo! Video search queries."""
1974         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
1975         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
1976         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
1977         _MORE_PAGES_INDICATOR = r'\s*Next'
1978         _yahoo_ie = None
1979         _max_yahoo_results = 1000
1980
1981         def __init__(self, yahoo_ie, downloader=None):
1982                 InfoExtractor.__init__(self, downloader)
1983                 self._yahoo_ie = yahoo_ie
1984
1985         @staticmethod
1986         def suitable(url):
1987                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
1988
1989         def report_download_page(self, query, pagenum):
1990                 """Report attempt to download playlist page with given number."""
1991                 query = query.decode(preferredencoding())
1992                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
1993
1994         def _real_initialize(self):
1995                 self._yahoo_ie.initialize()
1996
1997         def _real_extract(self, query):
1998                 mobj = re.match(self._VALID_QUERY, query)
1999                 if mobj is None:
2000                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2001                         return
2002
2003                 prefix, query = query.split(':')
2004                 prefix = prefix[8:]
2005                 query  = query.encode('utf-8')
2006                 if prefix == '':
2007                         self._download_n_results(query, 1)
2008                         return
2009                 elif prefix == 'all':
2010                         self._download_n_results(query, self._max_yahoo_results)
2011                         return
2012                 else:
2013                         try:
2014                                 n = long(prefix)
2015                                 if n <= 0:
2016                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2017                                         return
2018                                 elif n > self._max_yahoo_results:
2019                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n))
2020                                         n = self._max_yahoo_results
2021                                 self._download_n_results(query, n)
2022                                 return
2023                         except ValueError: # parsing prefix as integer fails
2024                                 self._download_n_results(query, 1)
2025                                 return
2026
2027         def _download_n_results(self, query, n):
2028                 """Downloads a specified number of results for a query"""
2029
2030                 video_ids = []
2031                 already_seen = set()
2032                 pagenum = 1
2033
2034                 while True:
2035                         self.report_download_page(query, pagenum)
2036                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2037                         request = urllib2.Request(result_url)
2038                         try:
2039                                 page = urllib2.urlopen(request).read()
2040                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2041                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2042                                 return
2043
2044                         # Extract video identifiers
2045                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2046                                 video_id = mobj.group(1)
2047                                 if video_id not in already_seen:
2048                                         video_ids.append(video_id)
2049                                         already_seen.add(video_id)
2050                                         if len(video_ids) == n:
2051                                                 # Specified n videos reached
2052                                                 for id in video_ids:
2053                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2054                                                 return
2055
2056                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2057                                 for id in video_ids:
2058                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2059                                 return
2060
2061                         pagenum = pagenum + 1
2062
2063 class YoutubePlaylistIE(InfoExtractor):
2064         """Information Extractor for YouTube playlists."""
2065
2066         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/|p/)([^&]+).*'
2067         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
2068         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2069         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2070         _youtube_ie = None
2071
2072         def __init__(self, youtube_ie, downloader=None):
2073                 InfoExtractor.__init__(self, downloader)
2074                 self._youtube_ie = youtube_ie
2075
2076         @staticmethod
2077         def suitable(url):
2078                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
2079
2080         def report_download_page(self, playlist_id, pagenum):
2081                 """Report attempt to download playlist page with given number."""
2082                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2083
2084         def _real_initialize(self):
2085                 self._youtube_ie.initialize()
2086
2087         def _real_extract(self, url):
2088                 # Extract playlist id
2089                 mobj = re.match(self._VALID_URL, url)
2090                 if mobj is None:
2091                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2092                         return
2093
2094                 # Download playlist pages
2095                 playlist_id = mobj.group(1)
2096                 video_ids = []
2097                 pagenum = 1
2098
2099                 while True:
2100                         self.report_download_page(playlist_id, pagenum)
2101                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum))
2102                         try:
2103                                 page = urllib2.urlopen(request).read()
2104                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2105                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2106                                 return
2107
2108                         # Extract video identifiers
2109                         ids_in_page = []
2110                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2111                                 if mobj.group(1) not in ids_in_page:
2112                                         ids_in_page.append(mobj.group(1))
2113                         video_ids.extend(ids_in_page)
2114
2115                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2116                                 break
2117                         pagenum = pagenum + 1
2118
2119                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2120                 playlistend = self._downloader.params.get('playlistend', -1)
2121                 video_ids = video_ids[playliststart:playlistend]
2122
2123                 for id in video_ids:
2124                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2125                 return
2126
2127 class YoutubeUserIE(InfoExtractor):
2128         """Information Extractor for YouTube users."""
2129
2130         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
2131         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2132         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
2133         _youtube_ie = None
2134
2135         def __init__(self, youtube_ie, downloader=None):
2136                 InfoExtractor.__init__(self, downloader)
2137                 self._youtube_ie = youtube_ie
2138
2139         @staticmethod
2140         def suitable(url):
2141                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2142
2143         def report_download_page(self, username):
2144                 """Report attempt to download user page."""
2145                 self._downloader.to_screen(u'[youtube] user %s: Downloading page ' % (username))
2146
2147         def _real_initialize(self):
2148                 self._youtube_ie.initialize()
2149
2150         def _real_extract(self, url):
2151                 # Extract username
2152                 mobj = re.match(self._VALID_URL, url)
2153                 if mobj is None:
2154                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2155                         return
2156
2157                 # Download user page
2158                 username = mobj.group(1)
2159                 video_ids = []
2160                 pagenum = 1
2161
2162                 self.report_download_page(username)
2163                 request = urllib2.Request(self._TEMPLATE_URL % (username))
2164                 try:
2165                         page = urllib2.urlopen(request).read()
2166                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2167                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2168                         return
2169
2170                 # Extract video identifiers
2171                 ids_in_page = []
2172
2173                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2174                         if mobj.group(1) not in ids_in_page:
2175                                 ids_in_page.append(mobj.group(1))
2176                 video_ids.extend(ids_in_page)
2177
2178                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2179                 playlistend = self._downloader.params.get('playlistend', -1)
2180                 video_ids = video_ids[playliststart:playlistend]
2181
2182                 for id in video_ids:
2183                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2184                 return
2185
2186 class DepositFilesIE(InfoExtractor):
2187         """Information extractor for depositfiles.com"""
2188
2189         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
2190
2191         def __init__(self, downloader=None):
2192                 InfoExtractor.__init__(self, downloader)
2193
2194         @staticmethod
2195         def suitable(url):
2196                 return (re.match(DepositFilesIE._VALID_URL, url) is not None)
2197
2198         def report_download_webpage(self, file_id):
2199                 """Report webpage download."""
2200                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2201
2202         def report_extraction(self, file_id):
2203                 """Report information extraction."""
2204                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2205
2206         def _real_initialize(self):
2207                 return
2208
2209         def _real_extract(self, url):
2210                 # At this point we have a new file
2211                 self._downloader.increment_downloads()
2212
2213                 file_id = url.split('/')[-1]
2214                 # Rebuild url in english locale
2215                 url = 'http://depositfiles.com/en/files/' + file_id
2216
2217                 # Retrieve file webpage with 'Free download' button pressed
2218                 free_download_indication = { 'gateway_result' : '1' }
2219                 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
2220                 try:
2221                         self.report_download_webpage(file_id)
2222                         webpage = urllib2.urlopen(request).read()
2223                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2224                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2225                         return
2226
2227                 # Search for the real file URL
2228                 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2229                 if (mobj is None) or (mobj.group(1) is None):
2230                         # Try to figure out reason of the error.
2231                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2232                         if (mobj is not None) and (mobj.group(1) is not None):
2233                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2234                                 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2235                         else:
2236                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2237                         return
2238
2239                 file_url = mobj.group(1)
2240                 file_extension = os.path.splitext(file_url)[1][1:]
2241
2242                 # Search for file title
2243                 mobj = re.search(r'<b title="(.*?)">', webpage)
2244                 if mobj is None:
2245                         self._downloader.trouble(u'ERROR: unable to extract title')
2246                         return
2247                 file_title = mobj.group(1).decode('utf-8')
2248
2249                 try:
2250                         # Process file information
2251                         self._downloader.process_info({
2252                                 'id':           file_id.decode('utf-8'),
2253                                 'url':          file_url.decode('utf-8'),
2254                                 'uploader':     u'NA',
2255                                 'upload_date':  u'NA',
2256                                 'title':        file_title,
2257                                 'stitle':       file_title,
2258                                 'ext':          file_extension.decode('utf-8'),
2259                                 'format':       u'NA',
2260                                 'player_url':   None,
2261                         })
2262                 except UnavailableVideoError, err:
2263                         self._downloader.trouble(u'ERROR: unable to download file')
2264
2265 class PostProcessor(object):
2266         """Post Processor class.
2267
2268         PostProcessor objects can be added to downloaders with their
2269         add_post_processor() method. When the downloader has finished a
2270         successful download, it will take its internal chain of PostProcessors
2271         and start calling the run() method on each one of them, first with
2272         an initial argument and then with the returned value of the previous
2273         PostProcessor.
2274
2275         The chain will be stopped if one of them ever returns None or the end
2276         of the chain is reached.
2277
2278         PostProcessor objects follow a "mutual registration" process similar
2279         to InfoExtractor objects.
2280         """
2281
2282         _downloader = None
2283
2284         def __init__(self, downloader=None):
2285                 self._downloader = downloader
2286
2287         def set_downloader(self, downloader):
2288                 """Sets the downloader for this PP."""
2289                 self._downloader = downloader
2290
2291         def run(self, information):
2292                 """Run the PostProcessor.
2293
2294                 The "information" argument is a dictionary like the ones
2295                 composed by InfoExtractors. The only difference is that this
2296                 one has an extra field called "filepath" that points to the
2297                 downloaded file.
2298
2299                 When this method returns None, the postprocessing chain is
2300                 stopped. However, this method may return an information
2301                 dictionary that will be passed to the next postprocessing
2302                 object in the chain. It can be the one it received after
2303                 changing some fields.
2304
2305                 In addition, this method may raise a PostProcessingError
2306                 exception that will be taken into account by the downloader
2307                 it was called from.
2308                 """
2309                 return information # by default, do nothing
2310
2311 ### MAIN PROGRAM ###
2312 if __name__ == '__main__':
2313         try:
2314                 # Modules needed only when running the main program
2315                 import getpass
2316                 import optparse
2317
2318                 # Function to update the program file with the latest version from the repository.
2319                 def update_self(downloader, filename):
2320                         # Note: downloader only used for options
2321                         if not os.access(filename, os.W_OK):
2322                                 sys.exit('ERROR: no write permissions on %s' % filename)
2323
2324                         downloader.to_screen('Updating to latest stable version...')
2325                         try:
2326                                 latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2327                                 latest_version = urllib.urlopen(latest_url).read().strip()
2328                                 prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2329                                 newcontent = urllib.urlopen(prog_url).read()
2330                         except (IOError, OSError), err:
2331                                 sys.exit('ERROR: unable to download latest version')
2332                         try:
2333                                 stream = open(filename, 'w')
2334                                 stream.write(newcontent)
2335                                 stream.close()
2336                         except (IOError, OSError), err:
2337                                 sys.exit('ERROR: unable to overwrite current version')
2338                         downloader.to_screen('Updated to version %s' % latest_version)
2339
2340                 # Parse command line
2341                 parser = optparse.OptionParser(
2342                         usage='Usage: %prog [options] url...',
2343                         version='2010.12.09',
2344                         conflict_handler='resolve',
2345                 )
2346
2347                 parser.add_option('-h', '--help',
2348                                 action='help', help='print this help text and exit')
2349                 parser.add_option('-v', '--version',
2350                                 action='version', help='print program version and exit')
2351                 parser.add_option('-U', '--update',
2352                                 action='store_true', dest='update_self', help='update this program to latest stable version')
2353                 parser.add_option('-i', '--ignore-errors',
2354                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2355                 parser.add_option('-r', '--rate-limit',
2356                                 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2357                 parser.add_option('-R', '--retries',
2358                                 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2359                 parser.add_option('--playlist-start',
2360                                 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2361                 parser.add_option('--playlist-end',
2362                                 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
2363                 parser.add_option('--dump-user-agent',
2364                                 action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False)
2365
2366                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2367                 authentication.add_option('-u', '--username',
2368                                 dest='username', metavar='USERNAME', help='account username')
2369                 authentication.add_option('-p', '--password',
2370                                 dest='password', metavar='PASSWORD', help='account password')
2371                 authentication.add_option('-n', '--netrc',
2372                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2373                 parser.add_option_group(authentication)
2374
2375                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2376                 video_format.add_option('-f', '--format',
2377                                 action='store', dest='format', metavar='FORMAT', help='video format code')
2378                 video_format.add_option('--all-formats',
2379                                 action='store_const', dest='format', help='download all available video formats', const='-1')
2380                 video_format.add_option('--max-quality',
2381                                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2382                 parser.add_option_group(video_format)
2383
2384                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2385                 verbosity.add_option('-q', '--quiet',
2386                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2387                 verbosity.add_option('-s', '--simulate',
2388                                 action='store_true', dest='simulate', help='do not download video', default=False)
2389                 verbosity.add_option('-g', '--get-url',
2390                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2391                 verbosity.add_option('-e', '--get-title',
2392                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2393                 verbosity.add_option('--get-thumbnail',
2394                                 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
2395                 verbosity.add_option('--get-description',
2396                                 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
2397                 verbosity.add_option('--get-filename',
2398                                 action='store_true', dest='getfilename', help='simulate, quiet but print output filename', default=False)
2399                 verbosity.add_option('--no-progress',
2400                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2401                 verbosity.add_option('--console-title',
2402                                 action='store_true', dest='consoletitle', help='display progress in console titlebar', default=False)
2403                 parser.add_option_group(verbosity)
2404
2405                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2406                 filesystem.add_option('-t', '--title',
2407                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
2408                 filesystem.add_option('-l', '--literal',
2409                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2410                 filesystem.add_option('-A', '--auto-number',
2411                                 action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False)
2412                 filesystem.add_option('-o', '--output',
2413                                 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2414                 filesystem.add_option('-a', '--batch-file',
2415                                 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2416                 filesystem.add_option('-w', '--no-overwrites',
2417                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2418                 filesystem.add_option('-c', '--continue',
2419                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2420                 filesystem.add_option('--cookies',
2421                                 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2422                 filesystem.add_option('--no-part',
2423                                 action='store_true', dest='nopart', help='do not use .part files', default=False)
2424                 parser.add_option_group(filesystem)
2425
2426                 (opts, args) = parser.parse_args()
2427
2428                 # Open appropriate CookieJar
2429                 if opts.cookiefile is None:
2430                         jar = cookielib.CookieJar()
2431                 else:
2432                         try:
2433                                 jar = cookielib.MozillaCookieJar(opts.cookiefile)
2434                                 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
2435                                         jar.load()
2436                         except (IOError, OSError), err:
2437                                 sys.exit(u'ERROR: unable to open cookie file')
2438
2439                 # Dump user agent
2440                 if opts.dump_user_agent:
2441                         print std_headers['User-Agent']
2442                         sys.exit(0)
2443
2444                 # General configuration
2445                 cookie_processor = urllib2.HTTPCookieProcessor(jar)
2446                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()))
2447                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2448
2449                 # Batch file verification
2450                 batchurls = []
2451                 if opts.batchfile is not None:
2452                         try:
2453                                 if opts.batchfile == '-':
2454                                         batchfd = sys.stdin
2455                                 else:
2456                                         batchfd = open(opts.batchfile, 'r')
2457                                 batchurls = batchfd.readlines()
2458                                 batchurls = [x.strip() for x in batchurls]
2459                                 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
2460                         except IOError:
2461                                 sys.exit(u'ERROR: batch file could not be read')
2462                 all_urls = batchurls + args
2463
2464                 # Conflicting, missing and erroneous options
2465                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2466                         parser.error(u'using .netrc conflicts with giving username/password')
2467                 if opts.password is not None and opts.username is None:
2468                         parser.error(u'account username missing')
2469                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
2470                         parser.error(u'using output template conflicts with using title, literal title or auto number')
2471                 if opts.usetitle and opts.useliteral:
2472                         parser.error(u'using title conflicts with using literal title')
2473                 if opts.username is not None and opts.password is None:
2474                         opts.password = getpass.getpass(u'Type account password and press return:')
2475                 if opts.ratelimit is not None:
2476                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2477                         if numeric_limit is None:
2478                                 parser.error(u'invalid rate limit specified')
2479                         opts.ratelimit = numeric_limit
2480                 if opts.retries is not None:
2481                         try:
2482                                 opts.retries = long(opts.retries)
2483                         except (TypeError, ValueError), err:
2484                                 parser.error(u'invalid retry count specified')
2485                 try:
2486                         opts.playliststart = long(opts.playliststart)
2487                         if opts.playliststart <= 0:
2488                                 raise ValueError
2489                 except (TypeError, ValueError), err:
2490                         parser.error(u'invalid playlist start number specified')
2491                 try:
2492                         opts.playlistend = long(opts.playlistend)
2493                         if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2494                                 raise ValueError
2495                 except (TypeError, ValueError), err:
2496                         parser.error(u'invalid playlist end number specified')
2497
2498                 # Information extractors
2499                 youtube_ie = YoutubeIE()
2500                 metacafe_ie = MetacafeIE(youtube_ie)
2501                 dailymotion_ie = DailymotionIE()
2502                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2503                 youtube_user_ie = YoutubeUserIE(youtube_ie)
2504                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2505                 google_ie = GoogleIE()
2506                 google_search_ie = GoogleSearchIE(google_ie)
2507                 photobucket_ie = PhotobucketIE()
2508                 yahoo_ie = YahooIE()
2509                 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2510                 deposit_files_ie = DepositFilesIE()
2511                 generic_ie = GenericIE()
2512
2513                 # File downloader
2514                 fd = FileDownloader({
2515                         'usenetrc': opts.usenetrc,
2516                         'username': opts.username,
2517                         'password': opts.password,
2518                         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
2519                         'forceurl': opts.geturl,
2520                         'forcetitle': opts.gettitle,
2521                         'forcethumbnail': opts.getthumbnail,
2522                         'forcedescription': opts.getdescription,
2523                         'forcefilename': opts.getfilename,
2524                         'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
2525                         'format': opts.format,
2526                         'format_limit': opts.format_limit,
2527                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2528                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2529                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2530                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2531                                 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
2532                                 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
2533                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2534                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2535                                 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
2536                                 or u'%(id)s.%(ext)s'),
2537                         'ignoreerrors': opts.ignoreerrors,
2538                         'ratelimit': opts.ratelimit,
2539                         'nooverwrites': opts.nooverwrites,
2540                         'retries': opts.retries,
2541                         'continuedl': opts.continue_dl,
2542                         'noprogress': opts.noprogress,
2543                         'playliststart': opts.playliststart,
2544                         'playlistend': opts.playlistend,
2545                         'logtostderr': opts.outtmpl == '-',
2546                         'consoletitle': opts.consoletitle,
2547                         'nopart': opts.nopart,
2548                         })
2549                 fd.add_info_extractor(youtube_search_ie)
2550                 fd.add_info_extractor(youtube_pl_ie)
2551                 fd.add_info_extractor(youtube_user_ie)
2552                 fd.add_info_extractor(metacafe_ie)
2553                 fd.add_info_extractor(dailymotion_ie)
2554                 fd.add_info_extractor(youtube_ie)
2555                 fd.add_info_extractor(google_ie)
2556                 fd.add_info_extractor(google_search_ie)
2557                 fd.add_info_extractor(photobucket_ie)
2558                 fd.add_info_extractor(yahoo_ie)
2559                 fd.add_info_extractor(yahoo_search_ie)
2560                 fd.add_info_extractor(deposit_files_ie)
2561
2562                 # This must come last since it's the
2563                 # fallback if none of the others work
2564                 fd.add_info_extractor(generic_ie)
2565
2566                 # Update version
2567                 if opts.update_self:
2568                         update_self(fd, sys.argv[0])
2569
2570                 # Maybe do nothing
2571                 if len(all_urls) < 1:
2572                         if not opts.update_self:
2573                                 parser.error(u'you must provide at least one URL')
2574                         else:
2575                                 sys.exit()
2576                 retcode = fd.download(all_urls)
2577
2578                 # Dump cookie jar if requested
2579                 if opts.cookiefile is not None:
2580                         try:
2581                                 jar.save()
2582                         except (IOError, OSError), err:
2583                                 sys.exit(u'ERROR: unable to save cookie jar')
2584
2585                 sys.exit(retcode)
2586
2587         except DownloadError:
2588                 sys.exit(1)
2589         except SameFileError:
2590                 sys.exit(u'ERROR: fixed output name but more than one file to download')
2591         except KeyboardInterrupt:
2592                 sys.exit(u'\nERROR: Interrupted by user')